/* Functions to perform alpha blended blitting */
+#ifdef __ARM_NEON__
+
+/* NEON optimized blitter callers */
+#define make_neon_caller(name, neon_name) \
+extern void neon_name(void *dst, const void *src, int count); \
+static void name(SDL_BlitInfo *info) \
+{ \
+ int width = info->d_width; \
+ int height = info->d_height; \
+ Uint8 *src = info->s_pixels; \
+ Uint8 *dst = info->d_pixels; \
+ int srcskip = info->s_skip; \
+ int dstskip = info->d_skip; \
+\
+ while ( height-- ) { \
+ neon_name(dst, src, width); \
+ src += width * 4 + srcskip; \
+ dst += width * 4 + dstskip; \
+ } \
+}
+
+#define make_neon_callerS(name, neon_name) \
+extern void neon_name(void *dst, const void *src, int count, unsigned int alpha); \
+static void name(SDL_BlitInfo *info) \
+{ \
+ int width = info->d_width; \
+ int height = info->d_height; \
+ Uint8 *src = info->s_pixels; \
+ Uint8 *dst = info->d_pixels; \
+ int srcskip = info->s_skip; \
+ int dstskip = info->d_skip; \
+ unsigned alpha = info->src->alpha;\
+\
+ while ( height-- ) { \
+ neon_name(dst, src, width, alpha); \
+ src += width * 4 + srcskip; \
+ dst += width * 4 + dstskip; \
+ } \
+}
+
+make_neon_caller(BlitABGRtoXRGBalpha_neon, neon_ABGRtoXRGBalpha)
+make_neon_caller(BlitARGBtoXRGBalpha_neon, neon_ARGBtoXRGBalpha)
+make_neon_callerS(BlitABGRtoXRGBalphaS_neon, neon_ABGRtoXRGBalphaS)
+make_neon_callerS(BlitARGBtoXRGBalphaS_neon, neon_ARGBtoXRGBalphaS)
+
+#endif /* __ARM_NEON__ */
+
/* N->1 blending with per-surface alpha */
static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info)
{
unsigned sA = srcfmt->alpha;
unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
+ if (srcbpp == 2 && srcfmt->Gmask == 0x7e0 && dstbpp == 2 && dstfmt->Gmask == 0x7e0) {
+ Uint16 *src16 = (Uint16 *)src;
+ Uint16 *dst16 = (Uint16 *)dst;
+ sA >>= 3; /* downscale alpha to 5 bits */
+ while ( height-- ) {
+ DUFFS_LOOP4(
+ {
+ Uint32 s;
+ Uint32 d;
+ s = *src16;
+ if(sA && s != ckey) {
+ d = *dst16;
+ s = (s | s << 16) & 0x07e0f81f;
+ d = (d | d << 16) & 0x07e0f81f;
+ d += (s - d) * sA >> 5;
+ d &= 0x07e0f81f;
+ *dst16 = (Uint16)(d | d >> 16);
+ }
+ src16++;
+ dst16++;
+ },
+ width);
+ src16 += srcskip / 2;
+ dst16 += dstskip / 2;
+ }
+ return;
+ }
+
while ( height-- ) {
DUFFS_LOOP4(
{
&& sf->Bshift % 8 == 0
&& SDL_HasMMX())
return BlitRGBtoRGBSurfaceAlphaMMX;
+#endif
+#ifdef __ARM_NEON__
+ if(sf->Rshift % 8 == 0
+ && sf->Gshift % 8 == 0
+ && sf->Bshift % 8 == 0)
+ {
+ return BlitARGBtoXRGBalphaS_neon;
+ }
#endif
if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff)
{
return BlitRGBtoRGBSurfaceAlpha;
}
}
+#ifdef __ARM_NEON__
+ if (sf->Gmask == df->Gmask && sf->Rmask == df->Bmask && sf->Bmask == df->Rmask
+ && sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0)
+ {
+ return BlitABGRtoXRGBalphaS_neon;
+ }
+#endif
#if SDL_ALTIVEC_BLITTERS
if((sf->BytesPerPixel == 4) &&
!(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec())
if(SDL_HasMMX())
return BlitRGBtoRGBPixelAlphaMMX;
}
+#endif
+#ifdef __ARM_NEON__
+ if(sf->Rshift % 8 == 0
+ && sf->Gshift % 8 == 0
+ && sf->Bshift % 8 == 0
+ && sf->Ashift % 8 == 0)
+ {
+ return BlitARGBtoXRGBalpha_neon;
+ }
#endif
if(sf->Amask == 0xff000000)
{
return BlitRGBtoRGBPixelAlpha;
}
}
+#ifdef __ARM_NEON__
+ if (sf->Gmask == df->Gmask && sf->Rmask == df->Bmask && sf->Bmask == df->Rmask
+ && sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0
+ && sf->Amask == 0xff000000)
+ {
+ return BlitABGRtoXRGBalpha_neon;
+ }
+#endif
#if SDL_ALTIVEC_BLITTERS
if (sf->Amask && sf->BytesPerPixel == 4 &&
!(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec())