tune the preloads a bit
[sdl_omap.git] / src / video / SDL_blit_A.c
index 4ecb521..504451b 100644 (file)
 
 /* Functions to perform alpha blended blitting */
 
+#ifdef __ARM_NEON__
+
+/* NEON optimized blitter callers */
+#define make_neon_caller(name, neon_name) \
+extern void neon_name(void *dst, const void *src, int count); \
+static void name(SDL_BlitInfo *info) \
+{ \
+       int width = info->d_width; \
+       int height = info->d_height; \
+       Uint8 *src = info->s_pixels; \
+       Uint8 *dst = info->d_pixels; \
+       int dstBpp = info->dst->BytesPerPixel; \
+       int srcstride = width * 4 + info->s_skip; \
+       int dststride = width * dstBpp + info->d_skip; \
+\
+       while ( height-- ) { \
+           __builtin_prefetch(dst + dststride); \
+           neon_name(dst, src, width); \
+           src += srcstride; \
+           dst += dststride; \
+       } \
+}
+
+#define make_neon_callerS(name, neon_name) \
+extern void neon_name(void *dst, const void *src, int count, unsigned int alpha); \
+static void name(SDL_BlitInfo *info) \
+{ \
+       int width = info->d_width; \
+       int height = info->d_height; \
+       Uint8 *src = info->s_pixels; \
+       Uint8 *dst = info->d_pixels; \
+       int srcskip = info->s_skip; \
+       int dstskip = info->d_skip; \
+       unsigned alpha = info->src->alpha;\
+\
+       while ( height-- ) { \
+           neon_name(dst, src, width, alpha); \
+           src += width * 4 + srcskip; \
+           dst += width * 4 + dstskip; \
+       } \
+}
+
+make_neon_caller(BlitABGRtoXRGBalpha_neon, neon_ABGRtoXRGBalpha)
+make_neon_caller(BlitARGBtoXRGBalpha_neon, neon_ARGBtoXRGBalpha)
+make_neon_caller(BlitABGRtoRGB565alpha_neon, neon_ABGRtoRGB565alpha)
+make_neon_caller(BlitARGBtoRGB565alpha_neon, neon_ARGBtoRGB565alpha)
+make_neon_callerS(BlitABGRtoXRGBalphaS_neon, neon_ABGRtoXRGBalphaS)
+make_neon_callerS(BlitARGBtoXRGBalphaS_neon, neon_ARGBtoXRGBalphaS)
+
+#endif /* __ARM_NEON__ */
+
 /* N->1 blending with per-surface alpha */
 static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info)
 {
@@ -2641,6 +2692,34 @@ static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info)
        unsigned sA = srcfmt->alpha;
        unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
 
+       if (srcbpp == 2 && srcfmt->Gmask == 0x7e0 && dstbpp == 2 && dstfmt->Gmask == 0x7e0) {
+           Uint16 *src16 = (Uint16 *)src;
+           Uint16 *dst16 = (Uint16 *)dst;
+           sA >>= 3;   /* downscale alpha to 5 bits */
+           while ( height-- ) {
+               DUFFS_LOOP4(
+               {
+                   Uint32 s;
+                   Uint32 d;
+                   s = *src16;
+                   if(sA && s != ckey) {
+                       d = *dst16;
+                       s = (s | s << 16) & 0x07e0f81f;
+                       d = (d | d << 16) & 0x07e0f81f;
+                       d += (s - d) * sA >> 5;
+                       d &= 0x07e0f81f;
+                       *dst16 = (Uint16)(d | d >> 16);
+                   }
+                   src16++;
+                   dst16++;
+               },
+               width);
+               src16 += srcskip / 2;
+               dst16 += dstskip / 2;
+           }
+           return;
+       }
+
        while ( height-- ) {
            DUFFS_LOOP4(
            {
@@ -2777,6 +2856,14 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index)
                           && sf->Bshift % 8 == 0
                           && SDL_HasMMX())
                            return BlitRGBtoRGBSurfaceAlphaMMX;
+#endif
+#ifdef __ARM_NEON__
+                       if(sf->Rshift % 8 == 0
+                          && sf->Gshift % 8 == 0
+                          && sf->Bshift % 8 == 0)
+                       {
+                               return BlitARGBtoXRGBalphaS_neon;
+                       }
 #endif
                        if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff)
                        {
@@ -2788,6 +2875,13 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index)
                                return BlitRGBtoRGBSurfaceAlpha;
                        }
                }
+#ifdef __ARM_NEON__
+               if (sf->Gmask == df->Gmask && sf->Rmask == df->Bmask && sf->Bmask == df->Rmask
+                   && sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0)
+               {
+                       return BlitABGRtoXRGBalphaS_neon;
+               }
+#endif
 #if SDL_ALTIVEC_BLITTERS
                if((sf->BytesPerPixel == 4) &&
                   !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec())
@@ -2814,6 +2908,16 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index)
           df->Bmask == 0x1f && SDL_HasAltiVec())
             return Blit32to565PixelAlphaAltivec;
         else
+#endif
+#ifdef __ARM_NEON__
+           if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
+              && sf->Gmask == 0xff00 && df->Gmask == 0x7e0) {
+               if((sf->Bmask >> 3) == df->Bmask || (sf->Rmask >> 3) == df->Rmask)
+                   return BlitARGBtoRGB565alpha_neon;
+               else
+                   return BlitABGRtoRGB565alpha_neon;
+           }
+           else
 #endif
            if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
               && sf->Gmask == 0xff00
@@ -2844,6 +2948,15 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index)
                        if(SDL_HasMMX())
                                return BlitRGBtoRGBPixelAlphaMMX;
                }
+#endif
+#ifdef __ARM_NEON__
+               if(sf->Rshift % 8 == 0
+                  && sf->Gshift % 8 == 0
+                  && sf->Bshift % 8 == 0
+                  && sf->Ashift % 8 == 0)
+               {
+                       return BlitARGBtoXRGBalpha_neon;
+               }
 #endif
                if(sf->Amask == 0xff000000)
                {
@@ -2855,6 +2968,14 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index)
                        return BlitRGBtoRGBPixelAlpha;
                }
            }
+#ifdef __ARM_NEON__
+           if (sf->Gmask == df->Gmask && sf->Rmask == df->Bmask && sf->Bmask == df->Rmask
+               && sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0
+               && sf->Amask == 0xff000000)
+           {
+               return BlitABGRtoXRGBalpha_neon;
+           }
+#endif
 #if SDL_ALTIVEC_BLITTERS
            if (sf->Amask && sf->BytesPerPixel == 4 &&
                !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec())