} \
}
+#define make_neon_callerS(name, neon_name) \
+extern void neon_name(void *dst, const void *src, int count, unsigned int alpha); \
+static void name(SDL_BlitInfo *info) \
+{ \
+ int width = info->d_width; \
+ int height = info->d_height; \
+ Uint8 *src = info->s_pixels; \
+ Uint8 *dst = info->d_pixels; \
+ int srcskip = info->s_skip; \
+ int dstskip = info->d_skip; \
+ unsigned alpha = info->src->alpha;\
+\
+ while ( height-- ) { \
+ neon_name(dst, src, width, alpha); \
+ src += width * 4 + srcskip; \
+ dst += width * 4 + dstskip; \
+ } \
+}
+
make_neon_caller(BlitABGRtoXRGBalpha_neon, neon_ABGRtoXRGBalpha)
make_neon_caller(BlitARGBtoXRGBalpha_neon, neon_ARGBtoXRGBalpha)
+make_neon_callerS(BlitABGRtoXRGBalphaS_neon, neon_ABGRtoXRGBalphaS)
+make_neon_callerS(BlitARGBtoXRGBalphaS_neon, neon_ARGBtoXRGBalphaS)
#endif /* __ARM_NEON__ */
&& sf->Bshift % 8 == 0
&& SDL_HasMMX())
return BlitRGBtoRGBSurfaceAlphaMMX;
+#endif
+#ifdef __ARM_NEON__
+ if(sf->Rshift % 8 == 0
+ && sf->Gshift % 8 == 0
+ && sf->Bshift % 8 == 0)
+ return BlitARGBtoXRGBalphaS_neon;
#endif
if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff)
{
b 0b
.endm
+
+@ void *dst, const void *src, int count, uint alpha
+.macro do_argb_alphaS bgr2rgb
+ mov r12, #0xff
+ vdup.16 q11, r3
+ vdup.16 q12, r12
+0:
+ vld4.8 {d4-d7}, [r1]!
+ vld4.8 {d0-d3}, [r0]
+.if \bgr2rgb
+ vswp d4, d6 @ BGR->RGB
+.endif
+ @ d = (((s-d)*a+255)>>8)+d
+ vsubl.u8 q8, d4, d0
+ vsubl.u8 q9, d5, d1
+ vsubl.u8 q10,d6, d2
+ vmul.s16 q8, q8, q11
+ vmul.s16 q9, q9, q11
+ vmul.s16 q10,q10,q11
+ vaddhn.i16 d4, q8, q12
+ vaddhn.i16 d5, q9, q12
+ vaddhn.i16 d6, q10,q12
+ vadd.i8 q2, q0
+ vadd.i8 d6, d2
+ vmov.i8 d7, d3
+ subs r2, r2, #8
+ blt do_argb_finish
+ vst4.8 {d4-d7}, [r0]!
+ bxeq lr
+ nop
+ b 0b
+.endm
+
+
do_argb_finish:
add r2, r2, #8
vzip.8 d4, d5 @ RRR..|GGG.. -> RGRG..
neon_ABGRtoXRGBalpha:
do_argb_alpha 1
+.global neon_ARGBtoXRGBalphaS
+neon_ARGBtoXRGBalphaS:
+ do_argb_alphaS 0
+
+.global neon_ABGRtoXRGBalphaS
+neon_ABGRtoXRGBalphaS:
+ do_argb_alphaS 1
+
@ vim:filetype=armasm