add some more NEON blitters from _wb_
authornotaz <notasas@gmail.com>
Wed, 22 Aug 2012 22:09:06 +0000 (01:09 +0300)
committernotaz <notasas@gmail.com>
Wed, 22 Aug 2012 22:40:52 +0000 (01:40 +0300)
(notaz: adjusted slightly to not trash callee-saved regs.)

src/video/SDL_blit_A.c
src/video/SDL_blit_neon.S

index 5a4cff0..565879d 100644 (file)
@@ -82,8 +82,29 @@ static void name(SDL_BlitInfo *info) \
        } \
 }
 
+#define make_neon_callerS(name, neon_name) \
+extern void neon_name(void *dst, const void *src, int count, unsigned int alpha); \
+static void name(SDL_BlitInfo *info) \
+{ \
+       int width = info->d_width; \
+       int height = info->d_height; \
+       Uint8 *src = info->s_pixels; \
+       Uint8 *dst = info->d_pixels; \
+       int srcskip = info->s_skip; \
+       int dstskip = info->d_skip; \
+       unsigned alpha = info->src->alpha;\
+\
+       while ( height-- ) { \
+           neon_name(dst, src, width, alpha); \
+           src += width * 4 + srcskip; \
+           dst += width * 4 + dstskip; \
+       } \
+}
+
 make_neon_caller(BlitABGRtoXRGBalpha_neon, neon_ABGRtoXRGBalpha)
 make_neon_caller(BlitARGBtoXRGBalpha_neon, neon_ARGBtoXRGBalpha)
+make_neon_callerS(BlitABGRtoXRGBalphaS_neon, neon_ABGRtoXRGBalphaS)
+make_neon_callerS(BlitARGBtoXRGBalphaS_neon, neon_ARGBtoXRGBalphaS)
 
 #endif /* __ARM_NEON__ */
 
@@ -2831,6 +2852,12 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index)
                           && sf->Bshift % 8 == 0
                           && SDL_HasMMX())
                            return BlitRGBtoRGBSurfaceAlphaMMX;
+#endif
+#ifdef __ARM_NEON__
+                       if(sf->Rshift % 8 == 0
+                          && sf->Gshift % 8 == 0
+                          && sf->Bshift % 8 == 0)
+                               return BlitARGBtoXRGBalphaS_neon;
 #endif
                        if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff)
                        {
index 438d9fc..2823bce 100644 (file)
     b          0b
 .endm
 
+
+@ void *dst, const void *src, int count, uint alpha
+.macro do_argb_alphaS bgr2rgb
+    mov        r12, #0xff
+    vdup.16    q11, r3
+    vdup.16    q12, r12
+0:
+    vld4.8     {d4-d7}, [r1]!
+    vld4.8     {d0-d3}, [r0]
+.if \bgr2rgb
+    vswp       d4, d6          @ BGR->RGB
+.endif
+    @ d = (((s-d)*a+255)>>8)+d
+    vsubl.u8   q8, d4, d0
+    vsubl.u8   q9, d5, d1
+    vsubl.u8   q10,d6, d2
+    vmul.s16   q8, q8, q11
+    vmul.s16   q9, q9, q11
+    vmul.s16   q10,q10,q11
+    vaddhn.i16 d4, q8, q12
+    vaddhn.i16 d5, q9, q12
+    vaddhn.i16 d6, q10,q12
+    vadd.i8    q2, q0
+    vadd.i8    d6, d2
+    vmov.i8    d7, d3
+    subs       r2, r2, #8
+    blt        do_argb_finish
+    vst4.8     {d4-d7}, [r0]!
+    bxeq       lr
+    nop
+    b          0b
+.endm
+
+
 do_argb_finish:
     add        r2, r2, #8
     vzip.8     d4, d5          @ RRR..|GGG.. -> RGRG..
@@ -104,4 +138,12 @@ neon_ARGBtoXRGBalpha:
 neon_ABGRtoXRGBalpha:
     do_argb_alpha 1
 
+.global neon_ARGBtoXRGBalphaS
+neon_ARGBtoXRGBalphaS:
+    do_argb_alphaS 0
+
+.global neon_ABGRtoXRGBalphaS
+neon_ABGRtoXRGBalphaS:
+    do_argb_alphaS 1
+
 @ vim:filetype=armasm