From: notaz Date: Sat, 9 Feb 2013 21:01:18 +0000 (+0200) Subject: NEONize a few more blit types X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2c4e54dd3606bd45b87b82589b6b8ee775f9f617;p=sdl_omap.git NEONize a few more blit types --- diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index d5000b2..b013ed3 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -72,13 +72,14 @@ static void name(SDL_BlitInfo *info) \ int height = info->d_height; \ Uint8 *src = info->s_pixels; \ Uint8 *dst = info->d_pixels; \ - int srcskip = info->s_skip; \ - int dstskip = info->d_skip; \ + int dstBpp = info->dst->BytesPerPixel; \ + int srcstride = width * 4 + info->s_skip; \ + int dststride = width * dstBpp + info->d_skip; \ \ while ( height-- ) { \ - neon_name(dst, src, width); \ - src += width * 4 + srcskip; \ - dst += width * 4 + dstskip; \ + neon_name(dst, src, width); \ + src += srcstride; \ + dst += dststride; \ } \ } @@ -103,6 +104,8 @@ static void name(SDL_BlitInfo *info) \ make_neon_caller(BlitABGRtoXRGBalpha_neon, neon_ABGRtoXRGBalpha) make_neon_caller(BlitARGBtoXRGBalpha_neon, neon_ARGBtoXRGBalpha) +make_neon_caller(BlitABGRtoRGB565alpha_neon, neon_ABGRtoRGB565alpha) +make_neon_caller(BlitARGBtoRGB565alpha_neon, neon_ARGBtoRGB565alpha) make_neon_callerS(BlitABGRtoXRGBalphaS_neon, neon_ABGRtoXRGBalphaS) make_neon_callerS(BlitARGBtoXRGBalphaS_neon, neon_ARGBtoXRGBalphaS) @@ -2904,6 +2907,16 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) df->Bmask == 0x1f && SDL_HasAltiVec()) return Blit32to565PixelAlphaAltivec; else +#endif +#ifdef __ARM_NEON__ + if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 + && sf->Gmask == 0xff00 && df->Gmask == 0x7e0) { + if((sf->Bmask >> 3) == df->Bmask || (sf->Rmask >> 3) == df->Rmask) + return BlitARGBtoRGB565alpha_neon; + else + return BlitABGRtoRGB565alpha_neon; + } + else #endif if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 diff --git a/src/video/SDL_blit_neon.S b/src/video/SDL_blit_neon.S index af9af36..344ae05 100644 --- a/src/video/SDL_blit_neon.S +++ b/src/video/SDL_blit_neon.S @@ -11,6 +11,10 @@ .text .align 2 +#define func(name) \ + .global name; \ + name + @ void *dst, const void *src, int count, uint abits .macro do_argb bgr2rgb vdup.i8 d0, r3 @@ -96,28 +100,103 @@ do_argb_finish: bx lr -.global neon_ARGBtoXRGB -neon_ARGBtoXRGB: +@ void *dst, const void *src, int count, uint global_alpha +.macro do_argb_to_rgb565_alpha bgr2rgb global_alpha + mov r12, #0xff +.if \global_alpha + vdup.16 q11, r3 +.endif + vdup.i16 q12, r12 +0: + pld [r1, #64*2] + pld [r0, #64*2] + vld4.8 {d4-d7}, [r1]! + vld2.8 {d1-d2}, [r0] +.if \bgr2rgb + vswp d4, d6 @ BGR->RGB +.endif +.if !\global_alpha + vmovl.u8 q11, d7 +.endif + vshl.i8 d0, d1, #3 + vshr.u8 d1, d1, #3 + vsri.i8 d0, d0, #5 @ B + vsli.i8 d1, d2, #5 + vsri.i8 d2, d2, #5 @ R + vsri.i8 d1, d1, #6 @ G + @ d = (((s-d)*a+255)>>8)+d + vsubl.u8 q8, d4, d0 + vsubl.u8 q9, d5, d1 + vsubl.u8 q10,d6, d2 + vmul.s16 q8, q8, q11 + vmul.s16 q9, q9, q11 + vmul.s16 q10,q10,q11 + vaddhn.i16 d4, q8, q12 + vaddhn.i16 d5, q9, q12 + vaddhn.i16 d6, q10,q12 + vadd.i8 q2, q0 + vadd.i8 d2, d6 @ rrrr rrrr + vshr.u8 d0, d5, #2 + vshr.u8 d1, d4, #3 @ 000b bbbb + vsri.i8 d2, d5, #5 @ rrrr rggg + vsli.i8 d1, d0, #5 @ gggb bbbb + subs r2, r2, #8 + blt do_rgb565_finish + vst2.8 {d1-d2}, [r0]! + bxeq lr + nop + b 0b +.endm + + +do_rgb565_finish: + vzip.8 d1, d2 + add r2, r2, #8 + + vst1.16 d1[0], [r0]! + cmp r2, #1 + bxle lr + vst1.16 d1[1], [r0]! + cmp r2, #2 + bxle lr + vst1.16 d1[2], [r0]! + cmp r2, #3 + bxle lr + vst1.16 d1[3], [r0]! + cmp r2, #4 + bxle lr + vst1.16 d2[0], [r0]! + cmp r2, #5 + bxle lr + vst1.16 d2[1], [r0]! + cmp r2, #6 + bxle lr + vst1.16 d2[2], [r0]! + bx lr + + +func(neon_ARGBtoXRGB): do_argb 0 -.global neon_ABGRtoXRGB -neon_ABGRtoXRGB: +func(neon_ABGRtoXRGB): do_argb 1 -.global neon_ARGBtoXRGBalpha -neon_ARGBtoXRGBalpha: +func(neon_ARGBtoXRGBalpha): do_argb_alpha 0, 0 -.global neon_ABGRtoXRGBalpha -neon_ABGRtoXRGBalpha: +func(neon_ABGRtoXRGBalpha): do_argb_alpha 1, 0 -.global neon_ARGBtoXRGBalphaS -neon_ARGBtoXRGBalphaS: +func(neon_ARGBtoXRGBalphaS): do_argb_alpha 0, 1 -.global neon_ABGRtoXRGBalphaS -neon_ABGRtoXRGBalphaS: +func(neon_ABGRtoXRGBalphaS): do_argb_alpha 1, 1 +func(neon_ARGBtoRGB565alpha): + do_argb_to_rgb565_alpha 0, 0 + +func(neon_ABGRtoRGB565alpha): + do_argb_to_rgb565_alpha 1, 0 + @ vim:filetype=armasm