frontend: update libpicofe, fix missed callbacks
[pcsx_rearmed.git] / frontend / cspace.c
CommitLineData
c9099d02 1/*
d639fa7f 2 * (C) GraÅžvydas "notaz" Ignotas, 2011,2012,2022
c9099d02 3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
f189413e 11#include <stdint.h>
a80ae4a0 12#include "cspace.h"
b06f78f1 13#include "compiler_features.h"
a80ae4a0 14
4ea7de6a 15/*
16 * note: these are intended for testing and should be avoided
17 * in favor of NEON version or platform-specific conversion
18 */
19
ae8f89db
PC
20#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
21#define SWAP16(x) __builtin_bswap16(x)
22#define LE16TOHx2(x) ((SWAP16((x) >> 16) << 16) | SWAP16(x))
23#else
24#define LE16TOHx2(x) (x)
25#endif
26
d639fa7f 27#if defined(HAVE_bgr555_to_rgb565)
28
29/* have bgr555_to_rgb565 somewhere else */
30
31#elif ((defined(__clang_major__) && __clang_major__ >= 4) \
32 || (defined(__GNUC__) && __GNUC__ >= 5)) \
33 && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
34
d639fa7f 35#include <assert.h>
36
37#if defined(__ARM_NEON) || defined(__ARM_NEON__)
38#include <arm_neon.h>
39#define gsli(d_, s_, n_) d_ = vsliq_n_u16(d_, s_, n_)
40#define gsri(d_, s_, n_) d_ = vsriq_n_u16(d_, s_, n_)
41#else
42#define gsli(d_, s_, n_) d_ |= s_ << n_
43#define gsri(d_, s_, n_) d_ |= s_ >> n_
44#endif
45
46typedef uint16_t gvu16 __attribute__((vector_size(16),aligned(16)));
47typedef uint16_t gvu16u __attribute__((vector_size(16),aligned(2)));
48#define gdup(v_) {v_, v_, v_, v_, v_, v_, v_, v_}
49#define do_one(s) ({ \
50 uint16_t d_ = (s) << 1; d_ = (d_ & 0x07c0) | (d_ << 10) | (d_ >> 11); d_; \
51})
52#define do_one_simd(d_, s_, c0x07c0_) { \
53 gvu16 s1 = s_ << 1; \
54 d_ = s1 & c0x07c0_; \
55 gsli(d_, s_, 11); \
56 gsri(d_, s1, 11); \
57}
58
59void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, int bytes)
60{
61 const uint16_t * __restrict__ src = src_;
62 uint16_t * __restrict__ dst = dst_;
63 gvu16 c0x07c0 = gdup(0x07c0);
64
65 assert(!(((uintptr_t)dst | (uintptr_t)src | bytes) & 1));
66
67 // align the destination
68 if ((uintptr_t)dst & 0x0e)
69 {
70 uintptr_t left = 0x10 - ((uintptr_t)dst & 0x0e);
71 gvu16 d, s = *(const gvu16u *)src;
72 do_one_simd(d, s, c0x07c0);
73 *(gvu16u *)dst = d;
74 dst += left / 2;
75 src += left / 2;
76 bytes -= left;
77 }
78 // go
79 for (; bytes >= 16; dst += 8, src += 8, bytes -= 16)
80 {
81 gvu16 d, s = *(const gvu16u *)src;
82 do_one_simd(d, s, c0x07c0);
83 *(gvu16 *)dst = d;
84 __builtin_prefetch(src + 128/2);
85 }
86 // finish it
87 for (; bytes > 0; dst++, src++, bytes -= 2)
88 *dst = do_one(*src);
89}
90#undef do_one
91#undef do_one_simd
92
93#else
c9099d02 94
55b0eeea 95void bgr555_to_rgb565(void *dst_, const void *src_, int bytes)
96{
f189413e 97 // source can be misaligned, but it's very rare, so just force
98 const unsigned int *src = (const void *)((intptr_t)src_ & ~3);
4ea7de6a 99 unsigned int *dst = dst_;
ae8f89db 100 unsigned int x, p, r, g, b;
55b0eeea 101
102 for (x = 0; x < bytes / 4; x++) {
ae8f89db
PC
103 p = LE16TOHx2(src[x]);
104
105 r = (p & 0x001f001f) << 11;
106 g = (p & 0x03e003e0) << 1;
107 b = (p & 0x7c007c00) >> 10;
108
109 dst[x] = r | g | b;
55b0eeea 110 }
111}
112
d57557c0 113#endif
114
9b592b3f 115#ifndef HAVE_bgr888_to_x
116
b06f78f1 117void attr_weak bgr888_to_rgb565(void *dst_, const void *src_, int bytes)
4ea7de6a 118{
119 const unsigned char *src = src_;
120 unsigned int *dst = dst_;
121 unsigned int r1, g1, b1, r2, g2, b2;
122
123 for (; bytes >= 6; bytes -= 6, src += 6, dst++) {
124 r1 = src[0] & 0xf8;
125 g1 = src[1] & 0xfc;
126 b1 = src[2] & 0xf8;
127 r2 = src[3] & 0xf8;
128 g2 = src[4] & 0xfc;
129 b2 = src[5] & 0xf8;
ae8f89db
PC
130#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
131 *dst = (r1 << 24) | (g1 << 19) | (b1 << 13) |
132 (r2 << 8) | (g2 << 3) | (b2 >> 3);
133#else
4ea7de6a 134 *dst = (r2 << 24) | (g2 << 19) | (b2 << 13) |
135 (r1 << 8) | (g1 << 3) | (b1 >> 3);
ae8f89db 136#endif
4ea7de6a 137 }
138}
139
a80ae4a0 140// TODO?
00a5d459 141void rgb888_to_rgb565(void *dst, const void *src, int bytes) {}
a80ae4a0 142void bgr888_to_rgb888(void *dst, const void *src, int bytes) {}
a80ae4a0 143
9b592b3f 144#endif // HAVE_bgr888_to_x
145
4d99e39b 146void bgr555_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes)
147{
148 const uint16_t * __restrict__ src = src_;
149 uint32_t * __restrict__ dst = dst_;
150
151 for (; bytes >= 2; bytes -= 2, src++, dst++)
152 {
153 uint32_t t = ((*src << 19) | (*src >> 7)) & 0xf800f8;
154 t |= (*src << 6) & 0xf800;
155 *dst = t | ((t >> 5) & 0x070707);
156 }
157}
158
159void bgr888_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes)
160{
161 const uint8_t * __restrict__ src = src_;
162 uint32_t * __restrict__ dst = dst_;
163
164 for (; bytes >= 3; bytes -= 3, src += 3, dst++)
165 *dst = (src[0] << 16) | (src[1] << 8) | src[2];
166}
167
c9099d02 168/* YUV stuff */
169static int yuv_ry[32], yuv_gy[32], yuv_by[32];
170static unsigned char yuv_u[32 * 2], yuv_v[32 * 2];
81023939 171static struct uyvy { uint32_t y:8; uint32_t vyu:24; } yuv_uyvy[32768];
c9099d02 172
173void bgr_to_uyvy_init(void)
174{
81023939 175 unsigned char yuv_y[256];
176 int i, v;
177
178 /* init yuv converter:
179 y0 = (int)((0.299f * r0) + (0.587f * g0) + (0.114f * b0));
180 y1 = (int)((0.299f * r1) + (0.587f * g1) + (0.114f * b1));
181 u = (int)(8 * 0.565f * (b0 - y0)) + 128;
182 v = (int)(8 * 0.713f * (r0 - y0)) + 128;
183 */
184 for (i = 0; i < 32; i++) {
185 yuv_ry[i] = (int)(0.299f * i * 65536.0f + 0.5f);
186 yuv_gy[i] = (int)(0.587f * i * 65536.0f + 0.5f);
187 yuv_by[i] = (int)(0.114f * i * 65536.0f + 0.5f);
188 }
189 for (i = -32; i < 32; i++) {
190 v = (int)(8 * 0.565f * i) + 128;
191 if (v < 0)
192 v = 0;
193 if (v > 255)
194 v = 255;
195 yuv_u[i + 32] = v;
196 v = (int)(8 * 0.713f * i) + 128;
197 if (v < 0)
198 v = 0;
199 if (v > 255)
200 v = 255;
201 yuv_v[i + 32] = v;
202 }
203 // valid Y range seems to be 16..235
204 for (i = 0; i < 256; i++) {
205 yuv_y[i] = 16 + 219 * i / 32;
206 }
207 // everything combined into one large array for speed
208 for (i = 0; i < 32768; i++) {
209 int r = (i >> 0) & 0x1f, g = (i >> 5) & 0x1f, b = (i >> 10) & 0x1f;
210 int y = (yuv_ry[r] + yuv_gy[g] + yuv_by[b]) >> 16;
211 yuv_uyvy[i].y = yuv_y[y];
212#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
213 yuv_uyvy[i].vyu = (yuv_v[b-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[r-y + 32];
214#else
215 yuv_uyvy[i].vyu = (yuv_v[r-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[b-y + 32];
216#endif
217 }
c9099d02 218}
219
5b9aa749 220void rgb565_to_uyvy(void *d, const void *s, int pixels)
221{
222 unsigned int *dst = d;
223 const unsigned short *src = s;
224 const unsigned char *yu = yuv_u + 32;
225 const unsigned char *yv = yuv_v + 32;
226 int r0, g0, b0, r1, g1, b1;
227 int y0, y1, u, v;
228
229 for (; pixels > 0; src += 2, dst++, pixels -= 2)
230 {
231 r0 = (src[0] >> 11) & 0x1f;
232 g0 = (src[0] >> 6) & 0x1f;
233 b0 = src[0] & 0x1f;
234 r1 = (src[1] >> 11) & 0x1f;
235 g1 = (src[1] >> 6) & 0x1f;
236 b1 = src[1] & 0x1f;
237 y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16;
238 y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16;
239 u = yu[b0 - y0];
240 v = yv[r0 - y0];
241 // valid Y range seems to be 16..235
242 y0 = 16 + 219 * y0 / 31;
243 y1 = 16 + 219 * y1 / 31;
244
245 *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
246 }
247}
248
81023939 249void bgr555_to_uyvy(void *d, const void *s, int pixels, int x2)
c9099d02 250{
81023939 251 uint32_t *dst = d;
252 const uint16_t *src = s;
253 int i;
254
255 if (x2) {
256 for (i = pixels; i >= 4; src += 4, dst += 4, i -= 4)
257 {
258 const struct uyvy *uyvy0 = yuv_uyvy + (src[0] & 0x7fff);
259 const struct uyvy *uyvy1 = yuv_uyvy + (src[1] & 0x7fff);
260 const struct uyvy *uyvy2 = yuv_uyvy + (src[2] & 0x7fff);
261 const struct uyvy *uyvy3 = yuv_uyvy + (src[3] & 0x7fff);
262#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
263 dst[0] = uyvy0->y | (uyvy0->vyu << 8);
264 dst[1] = uyvy1->y | (uyvy1->vyu << 8);
265 dst[2] = uyvy2->y | (uyvy2->vyu << 8);
266 dst[3] = uyvy3->y | (uyvy3->vyu << 8);
267#else
268 dst[0] = (uyvy0->y << 24) | uyvy0->vyu;
269 dst[1] = (uyvy1->y << 24) | uyvy1->vyu;
270 dst[2] = (uyvy2->y << 24) | uyvy2->vyu;
271 dst[3] = (uyvy3->y << 24) | uyvy3->vyu;
272#endif
273 }
274 } else {
275 for (i = pixels; i >= 4; src += 4, dst += 2, i -= 4)
276 {
277 const struct uyvy *uyvy0 = yuv_uyvy + (src[0] & 0x7fff);
278 const struct uyvy *uyvy1 = yuv_uyvy + (src[1] & 0x7fff);
279 const struct uyvy *uyvy2 = yuv_uyvy + (src[2] & 0x7fff);
280 const struct uyvy *uyvy3 = yuv_uyvy + (src[3] & 0x7fff);
281#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
282 dst[0] = uyvy1->y | (uyvy0->vyu << 8);
283 dst[1] = uyvy3->y | (uyvy2->vyu << 8);
284#else
285 dst[0] = (uyvy1->y << 24) | uyvy0->vyu;
286 dst[1] = (uyvy3->y << 24) | uyvy2->vyu;
287#endif
288 }
289 }
c9099d02 290}
291
81023939 292void bgr888_to_uyvy(void *d, const void *s, int pixels, int x2)
c9099d02 293{
81023939 294 unsigned int *dst = d;
295 const unsigned char *src8 = s;
296 const unsigned char *yu = yuv_u + 32;
297 const unsigned char *yv = yuv_v + 32;
298 int r0, g0, b0, r1, g1, b1;
299 int y0, y1, u0, u1, v0, v1;
300
301 if (x2) {
302 for (; pixels >= 2; src8 += 3*2, pixels -= 2)
303 {
304 r0 = src8[0], g0 = src8[1], b0 = src8[2];
305 r1 = src8[3], g1 = src8[4], b1 = src8[5];
306 y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16;
307 y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16;
308 u0 = yu[(b0 - y0) / 8];
309 u1 = yu[(b1 - y1) / 8];
310 v0 = yv[(r0 - y0) / 8];
311 v1 = yv[(r1 - y1) / 8];
312 y0 = 16 + 219 * y0 / 255;
313 y1 = 16 + 219 * y1 / 255;
314
315 *dst++ = (y0 << 24) | (v0 << 16) | (y0 << 8) | u0;
316 *dst++ = (y1 << 24) | (v1 << 16) | (y1 << 8) | u1;
317 }
318 }
319 else {
320 for (; pixels >= 2; src8 += 3*2, dst++, pixels -= 2)
321 {
322 r0 = src8[0], g0 = src8[1], b0 = src8[2];
323 r1 = src8[3], g1 = src8[4], b1 = src8[5];
324 y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16;
325 y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16;
326 u0 = yu[(b0 - y0) / 8];
327 v0 = yv[(r0 - y0) / 8];
328 y0 = 16 + 219 * y0 / 255;
329 y1 = 16 + 219 * y1 / 255;
330
331 *dst = (y1 << 24) | (v0 << 16) | (y0 << 8) | u0;
332 }
333 }
c9099d02 334}