From bdfa698900e2b6c8601b77004a8ba91f5b30dbb6 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 23 Aug 2012 01:09:06 +0300 Subject: [PATCH] add some more NEON blitters from _wb_ (notaz: adjusted slightly to not trash callee-saved regs.) --- src/video/SDL_blit_A.c | 27 +++++++++++++++++++++++++ src/video/SDL_blit_neon.S | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index 5a4cff0..565879d 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -82,8 +82,29 @@ static void name(SDL_BlitInfo *info) \ } \ } +#define make_neon_callerS(name, neon_name) \ +extern void neon_name(void *dst, const void *src, int count, unsigned int alpha); \ +static void name(SDL_BlitInfo *info) \ +{ \ + int width = info->d_width; \ + int height = info->d_height; \ + Uint8 *src = info->s_pixels; \ + Uint8 *dst = info->d_pixels; \ + int srcskip = info->s_skip; \ + int dstskip = info->d_skip; \ + unsigned alpha = info->src->alpha;\ +\ + while ( height-- ) { \ + neon_name(dst, src, width, alpha); \ + src += width * 4 + srcskip; \ + dst += width * 4 + dstskip; \ + } \ +} + make_neon_caller(BlitABGRtoXRGBalpha_neon, neon_ABGRtoXRGBalpha) make_neon_caller(BlitARGBtoXRGBalpha_neon, neon_ARGBtoXRGBalpha) +make_neon_callerS(BlitABGRtoXRGBalphaS_neon, neon_ABGRtoXRGBalphaS) +make_neon_callerS(BlitARGBtoXRGBalphaS_neon, neon_ARGBtoXRGBalphaS) #endif /* __ARM_NEON__ */ @@ -2831,6 +2852,12 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) && sf->Bshift % 8 == 0 && SDL_HasMMX()) return BlitRGBtoRGBSurfaceAlphaMMX; +#endif +#ifdef __ARM_NEON__ + if(sf->Rshift % 8 == 0 + && sf->Gshift % 8 == 0 + && sf->Bshift % 8 == 0) + return BlitARGBtoXRGBalphaS_neon; #endif if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { diff --git a/src/video/SDL_blit_neon.S b/src/video/SDL_blit_neon.S index 438d9fc..2823bce 100644 --- a/src/video/SDL_blit_neon.S +++ b/src/video/SDL_blit_neon.S @@ -60,6 +60,40 @@ b 0b .endm + +@ void *dst, const void *src, int count, uint alpha +.macro do_argb_alphaS bgr2rgb + mov r12, #0xff + vdup.16 q11, r3 + vdup.16 q12, r12 +0: + vld4.8 {d4-d7}, [r1]! + vld4.8 {d0-d3}, [r0] +.if \bgr2rgb + vswp d4, d6 @ BGR->RGB +.endif + @ d = (((s-d)*a+255)>>8)+d + vsubl.u8 q8, d4, d0 + vsubl.u8 q9, d5, d1 + vsubl.u8 q10,d6, d2 + vmul.s16 q8, q8, q11 + vmul.s16 q9, q9, q11 + vmul.s16 q10,q10,q11 + vaddhn.i16 d4, q8, q12 + vaddhn.i16 d5, q9, q12 + vaddhn.i16 d6, q10,q12 + vadd.i8 q2, q0 + vadd.i8 d6, d2 + vmov.i8 d7, d3 + subs r2, r2, #8 + blt do_argb_finish + vst4.8 {d4-d7}, [r0]! + bxeq lr + nop + b 0b +.endm + + do_argb_finish: add r2, r2, #8 vzip.8 d4, d5 @ RRR..|GGG.. -> RGRG.. @@ -104,4 +138,12 @@ neon_ARGBtoXRGBalpha: neon_ABGRtoXRGBalpha: do_argb_alpha 1 +.global neon_ARGBtoXRGBalphaS +neon_ARGBtoXRGBalphaS: + do_argb_alphaS 0 + +.global neon_ABGRtoXRGBalphaS +neon_ABGRtoXRGBalphaS: + do_argb_alphaS 1 + @ vim:filetype=armasm -- 2.39.5