Commit | Line | Data |
---|---|---|
c9099d02 | 1 | /* |
d639fa7f | 2 | * (C) GraÅžvydas "notaz" Ignotas, 2011,2012,2022 |
c9099d02 | 3 | * |
4 | * This work is licensed under the terms of any of these licenses | |
5 | * (at your option): | |
6 | * - GNU GPL, version 2 or later. | |
7 | * - GNU LGPL, version 2.1 or later. | |
8 | * See the COPYING file in the top-level directory. | |
9 | */ | |
10 | ||
f189413e | 11 | #include <stdint.h> |
a80ae4a0 | 12 | #include "cspace.h" |
b06f78f1 | 13 | #include "compiler_features.h" |
a80ae4a0 | 14 | |
4ea7de6a | 15 | /* |
16 | * note: these are intended for testing and should be avoided | |
17 | * in favor of NEON version or platform-specific conversion | |
18 | */ | |
19 | ||
ae8f89db PC |
20 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
21 | #define SWAP16(x) __builtin_bswap16(x) | |
22 | #define LE16TOHx2(x) ((SWAP16((x) >> 16) << 16) | SWAP16(x)) | |
23 | #else | |
24 | #define LE16TOHx2(x) (x) | |
25 | #endif | |
26 | ||
d639fa7f | 27 | #if defined(HAVE_bgr555_to_rgb565) |
28 | ||
29 | /* have bgr555_to_rgb565 somewhere else */ | |
30 | ||
31 | #elif ((defined(__clang_major__) && __clang_major__ >= 4) \ | |
32 | || (defined(__GNUC__) && __GNUC__ >= 5)) \ | |
33 | && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ | |
34 | ||
d639fa7f | 35 | #include <assert.h> |
36 | ||
37 | #if defined(__ARM_NEON) || defined(__ARM_NEON__) | |
38 | #include <arm_neon.h> | |
39 | #define gsli(d_, s_, n_) d_ = vsliq_n_u16(d_, s_, n_) | |
40 | #define gsri(d_, s_, n_) d_ = vsriq_n_u16(d_, s_, n_) | |
41 | #else | |
42 | #define gsli(d_, s_, n_) d_ |= s_ << n_ | |
43 | #define gsri(d_, s_, n_) d_ |= s_ >> n_ | |
44 | #endif | |
45 | ||
46 | typedef uint16_t gvu16 __attribute__((vector_size(16),aligned(16))); | |
47 | typedef uint16_t gvu16u __attribute__((vector_size(16),aligned(2))); | |
48 | #define gdup(v_) {v_, v_, v_, v_, v_, v_, v_, v_} | |
49 | #define do_one(s) ({ \ | |
50 | uint16_t d_ = (s) << 1; d_ = (d_ & 0x07c0) | (d_ << 10) | (d_ >> 11); d_; \ | |
51 | }) | |
52 | #define do_one_simd(d_, s_, c0x07c0_) { \ | |
53 | gvu16 s1 = s_ << 1; \ | |
54 | d_ = s1 & c0x07c0_; \ | |
55 | gsli(d_, s_, 11); \ | |
56 | gsri(d_, s1, 11); \ | |
57 | } | |
58 | ||
59 | void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, int bytes) | |
60 | { | |
61 | const uint16_t * __restrict__ src = src_; | |
62 | uint16_t * __restrict__ dst = dst_; | |
63 | gvu16 c0x07c0 = gdup(0x07c0); | |
64 | ||
65 | assert(!(((uintptr_t)dst | (uintptr_t)src | bytes) & 1)); | |
66 | ||
67 | // align the destination | |
68 | if ((uintptr_t)dst & 0x0e) | |
69 | { | |
70 | uintptr_t left = 0x10 - ((uintptr_t)dst & 0x0e); | |
71 | gvu16 d, s = *(const gvu16u *)src; | |
72 | do_one_simd(d, s, c0x07c0); | |
73 | *(gvu16u *)dst = d; | |
74 | dst += left / 2; | |
75 | src += left / 2; | |
76 | bytes -= left; | |
77 | } | |
78 | // go | |
79 | for (; bytes >= 16; dst += 8, src += 8, bytes -= 16) | |
80 | { | |
81 | gvu16 d, s = *(const gvu16u *)src; | |
82 | do_one_simd(d, s, c0x07c0); | |
83 | *(gvu16 *)dst = d; | |
84 | __builtin_prefetch(src + 128/2); | |
85 | } | |
86 | // finish it | |
87 | for (; bytes > 0; dst++, src++, bytes -= 2) | |
88 | *dst = do_one(*src); | |
89 | } | |
90 | #undef do_one | |
91 | #undef do_one_simd | |
92 | ||
93 | #else | |
c9099d02 | 94 | |
55b0eeea | 95 | void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) |
96 | { | |
f189413e | 97 | // source can be misaligned, but it's very rare, so just force |
98 | const unsigned int *src = (const void *)((intptr_t)src_ & ~3); | |
4ea7de6a | 99 | unsigned int *dst = dst_; |
ae8f89db | 100 | unsigned int x, p, r, g, b; |
55b0eeea | 101 | |
102 | for (x = 0; x < bytes / 4; x++) { | |
ae8f89db PC |
103 | p = LE16TOHx2(src[x]); |
104 | ||
105 | r = (p & 0x001f001f) << 11; | |
106 | g = (p & 0x03e003e0) << 1; | |
107 | b = (p & 0x7c007c00) >> 10; | |
108 | ||
109 | dst[x] = r | g | b; | |
55b0eeea | 110 | } |
111 | } | |
112 | ||
d57557c0 | 113 | #endif |
114 | ||
9b592b3f | 115 | #ifndef HAVE_bgr888_to_x |
116 | ||
b06f78f1 | 117 | void attr_weak bgr888_to_rgb565(void *dst_, const void *src_, int bytes) |
4ea7de6a | 118 | { |
119 | const unsigned char *src = src_; | |
120 | unsigned int *dst = dst_; | |
121 | unsigned int r1, g1, b1, r2, g2, b2; | |
122 | ||
123 | for (; bytes >= 6; bytes -= 6, src += 6, dst++) { | |
124 | r1 = src[0] & 0xf8; | |
125 | g1 = src[1] & 0xfc; | |
126 | b1 = src[2] & 0xf8; | |
127 | r2 = src[3] & 0xf8; | |
128 | g2 = src[4] & 0xfc; | |
129 | b2 = src[5] & 0xf8; | |
ae8f89db PC |
130 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
131 | *dst = (r1 << 24) | (g1 << 19) | (b1 << 13) | | |
132 | (r2 << 8) | (g2 << 3) | (b2 >> 3); | |
133 | #else | |
4ea7de6a | 134 | *dst = (r2 << 24) | (g2 << 19) | (b2 << 13) | |
135 | (r1 << 8) | (g1 << 3) | (b1 >> 3); | |
ae8f89db | 136 | #endif |
4ea7de6a | 137 | } |
138 | } | |
139 | ||
a80ae4a0 | 140 | // TODO? |
00a5d459 | 141 | void rgb888_to_rgb565(void *dst, const void *src, int bytes) {} |
a80ae4a0 | 142 | void bgr888_to_rgb888(void *dst, const void *src, int bytes) {} |
a80ae4a0 | 143 | |
9b592b3f | 144 | #endif // HAVE_bgr888_to_x |
145 | ||
4d99e39b | 146 | void bgr555_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes) |
147 | { | |
148 | const uint16_t * __restrict__ src = src_; | |
149 | uint32_t * __restrict__ dst = dst_; | |
150 | ||
151 | for (; bytes >= 2; bytes -= 2, src++, dst++) | |
152 | { | |
153 | uint32_t t = ((*src << 19) | (*src >> 7)) & 0xf800f8; | |
154 | t |= (*src << 6) & 0xf800; | |
155 | *dst = t | ((t >> 5) & 0x070707); | |
156 | } | |
157 | } | |
158 | ||
159 | void bgr888_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes) | |
160 | { | |
161 | const uint8_t * __restrict__ src = src_; | |
162 | uint32_t * __restrict__ dst = dst_; | |
163 | ||
164 | for (; bytes >= 3; bytes -= 3, src += 3, dst++) | |
165 | *dst = (src[0] << 16) | (src[1] << 8) | src[2]; | |
166 | } | |
167 | ||
c9099d02 | 168 | /* YUV stuff */ |
169 | static int yuv_ry[32], yuv_gy[32], yuv_by[32]; | |
170 | static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; | |
81023939 | 171 | static struct uyvy { uint32_t y:8; uint32_t vyu:24; } yuv_uyvy[32768]; |
c9099d02 | 172 | |
173 | void bgr_to_uyvy_init(void) | |
174 | { | |
81023939 | 175 | unsigned char yuv_y[256]; |
176 | int i, v; | |
177 | ||
178 | /* init yuv converter: | |
179 | y0 = (int)((0.299f * r0) + (0.587f * g0) + (0.114f * b0)); | |
180 | y1 = (int)((0.299f * r1) + (0.587f * g1) + (0.114f * b1)); | |
181 | u = (int)(8 * 0.565f * (b0 - y0)) + 128; | |
182 | v = (int)(8 * 0.713f * (r0 - y0)) + 128; | |
183 | */ | |
184 | for (i = 0; i < 32; i++) { | |
185 | yuv_ry[i] = (int)(0.299f * i * 65536.0f + 0.5f); | |
186 | yuv_gy[i] = (int)(0.587f * i * 65536.0f + 0.5f); | |
187 | yuv_by[i] = (int)(0.114f * i * 65536.0f + 0.5f); | |
188 | } | |
189 | for (i = -32; i < 32; i++) { | |
190 | v = (int)(8 * 0.565f * i) + 128; | |
191 | if (v < 0) | |
192 | v = 0; | |
193 | if (v > 255) | |
194 | v = 255; | |
195 | yuv_u[i + 32] = v; | |
196 | v = (int)(8 * 0.713f * i) + 128; | |
197 | if (v < 0) | |
198 | v = 0; | |
199 | if (v > 255) | |
200 | v = 255; | |
201 | yuv_v[i + 32] = v; | |
202 | } | |
203 | // valid Y range seems to be 16..235 | |
204 | for (i = 0; i < 256; i++) { | |
205 | yuv_y[i] = 16 + 219 * i / 32; | |
206 | } | |
207 | // everything combined into one large array for speed | |
208 | for (i = 0; i < 32768; i++) { | |
209 | int r = (i >> 0) & 0x1f, g = (i >> 5) & 0x1f, b = (i >> 10) & 0x1f; | |
210 | int y = (yuv_ry[r] + yuv_gy[g] + yuv_by[b]) >> 16; | |
211 | yuv_uyvy[i].y = yuv_y[y]; | |
212 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |
213 | yuv_uyvy[i].vyu = (yuv_v[b-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[r-y + 32]; | |
214 | #else | |
215 | yuv_uyvy[i].vyu = (yuv_v[r-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[b-y + 32]; | |
216 | #endif | |
217 | } | |
c9099d02 | 218 | } |
219 | ||
5b9aa749 | 220 | void rgb565_to_uyvy(void *d, const void *s, int pixels) |
221 | { | |
222 | unsigned int *dst = d; | |
223 | const unsigned short *src = s; | |
224 | const unsigned char *yu = yuv_u + 32; | |
225 | const unsigned char *yv = yuv_v + 32; | |
226 | int r0, g0, b0, r1, g1, b1; | |
227 | int y0, y1, u, v; | |
228 | ||
229 | for (; pixels > 0; src += 2, dst++, pixels -= 2) | |
230 | { | |
231 | r0 = (src[0] >> 11) & 0x1f; | |
232 | g0 = (src[0] >> 6) & 0x1f; | |
233 | b0 = src[0] & 0x1f; | |
234 | r1 = (src[1] >> 11) & 0x1f; | |
235 | g1 = (src[1] >> 6) & 0x1f; | |
236 | b1 = src[1] & 0x1f; | |
237 | y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16; | |
238 | y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16; | |
239 | u = yu[b0 - y0]; | |
240 | v = yv[r0 - y0]; | |
241 | // valid Y range seems to be 16..235 | |
242 | y0 = 16 + 219 * y0 / 31; | |
243 | y1 = 16 + 219 * y1 / 31; | |
244 | ||
245 | *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; | |
246 | } | |
247 | } | |
248 | ||
81023939 | 249 | void bgr555_to_uyvy(void *d, const void *s, int pixels, int x2) |
c9099d02 | 250 | { |
81023939 | 251 | uint32_t *dst = d; |
252 | const uint16_t *src = s; | |
253 | int i; | |
254 | ||
255 | if (x2) { | |
256 | for (i = pixels; i >= 4; src += 4, dst += 4, i -= 4) | |
257 | { | |
258 | const struct uyvy *uyvy0 = yuv_uyvy + (src[0] & 0x7fff); | |
259 | const struct uyvy *uyvy1 = yuv_uyvy + (src[1] & 0x7fff); | |
260 | const struct uyvy *uyvy2 = yuv_uyvy + (src[2] & 0x7fff); | |
261 | const struct uyvy *uyvy3 = yuv_uyvy + (src[3] & 0x7fff); | |
262 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |
263 | dst[0] = uyvy0->y | (uyvy0->vyu << 8); | |
264 | dst[1] = uyvy1->y | (uyvy1->vyu << 8); | |
265 | dst[2] = uyvy2->y | (uyvy2->vyu << 8); | |
266 | dst[3] = uyvy3->y | (uyvy3->vyu << 8); | |
267 | #else | |
268 | dst[0] = (uyvy0->y << 24) | uyvy0->vyu; | |
269 | dst[1] = (uyvy1->y << 24) | uyvy1->vyu; | |
270 | dst[2] = (uyvy2->y << 24) | uyvy2->vyu; | |
271 | dst[3] = (uyvy3->y << 24) | uyvy3->vyu; | |
272 | #endif | |
273 | } | |
274 | } else { | |
275 | for (i = pixels; i >= 4; src += 4, dst += 2, i -= 4) | |
276 | { | |
277 | const struct uyvy *uyvy0 = yuv_uyvy + (src[0] & 0x7fff); | |
278 | const struct uyvy *uyvy1 = yuv_uyvy + (src[1] & 0x7fff); | |
279 | const struct uyvy *uyvy2 = yuv_uyvy + (src[2] & 0x7fff); | |
280 | const struct uyvy *uyvy3 = yuv_uyvy + (src[3] & 0x7fff); | |
281 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |
282 | dst[0] = uyvy1->y | (uyvy0->vyu << 8); | |
283 | dst[1] = uyvy3->y | (uyvy2->vyu << 8); | |
284 | #else | |
285 | dst[0] = (uyvy1->y << 24) | uyvy0->vyu; | |
286 | dst[1] = (uyvy3->y << 24) | uyvy2->vyu; | |
287 | #endif | |
288 | } | |
289 | } | |
c9099d02 | 290 | } |
291 | ||
81023939 | 292 | void bgr888_to_uyvy(void *d, const void *s, int pixels, int x2) |
c9099d02 | 293 | { |
81023939 | 294 | unsigned int *dst = d; |
295 | const unsigned char *src8 = s; | |
296 | const unsigned char *yu = yuv_u + 32; | |
297 | const unsigned char *yv = yuv_v + 32; | |
298 | int r0, g0, b0, r1, g1, b1; | |
299 | int y0, y1, u0, u1, v0, v1; | |
300 | ||
301 | if (x2) { | |
302 | for (; pixels >= 2; src8 += 3*2, pixels -= 2) | |
303 | { | |
304 | r0 = src8[0], g0 = src8[1], b0 = src8[2]; | |
305 | r1 = src8[3], g1 = src8[4], b1 = src8[5]; | |
306 | y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16; | |
307 | y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16; | |
308 | u0 = yu[(b0 - y0) / 8]; | |
309 | u1 = yu[(b1 - y1) / 8]; | |
310 | v0 = yv[(r0 - y0) / 8]; | |
311 | v1 = yv[(r1 - y1) / 8]; | |
312 | y0 = 16 + 219 * y0 / 255; | |
313 | y1 = 16 + 219 * y1 / 255; | |
314 | ||
315 | *dst++ = (y0 << 24) | (v0 << 16) | (y0 << 8) | u0; | |
316 | *dst++ = (y1 << 24) | (v1 << 16) | (y1 << 8) | u1; | |
317 | } | |
318 | } | |
319 | else { | |
320 | for (; pixels >= 2; src8 += 3*2, dst++, pixels -= 2) | |
321 | { | |
322 | r0 = src8[0], g0 = src8[1], b0 = src8[2]; | |
323 | r1 = src8[3], g1 = src8[4], b1 = src8[5]; | |
324 | y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16; | |
325 | y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16; | |
326 | u0 = yu[(b0 - y0) / 8]; | |
327 | v0 = yv[(r0 - y0) / 8]; | |
328 | y0 = 16 + 219 * y0 / 255; | |
329 | y1 = 16 + 219 * y1 / 255; | |
330 | ||
331 | *dst = (y1 << 24) | (v0 << 16) | (y0 << 8) | u0; | |
332 | } | |
333 | } | |
c9099d02 | 334 | } |