From a1eff5db323aedd15ed6fbfd13d06273b54d2d6f Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 23 Aug 2012 01:28:50 +0300 Subject: [PATCH] unify NEON code, use preload --- src/video/SDL_blit_neon.S | 56 +++++++++++---------------------------- 1 file changed, 15 insertions(+), 41 deletions(-) diff --git a/src/video/SDL_blit_neon.S b/src/video/SDL_blit_neon.S index 2823bce..af9af36 100644 --- a/src/video/SDL_blit_neon.S +++ b/src/video/SDL_blit_neon.S @@ -1,5 +1,5 @@ /* - * (C) Gražvydas "notaz" Ignotas, 2011 + * (C) Gražvydas "notaz" Ignotas, 2011,2012 * * This work is licensed under the terms of any of these licenses * (at your option): @@ -28,49 +28,23 @@ b 0b .endm -@ void *dst, const void *src, int count -.macro do_argb_alpha bgr2rgb - mov r3, #0xff - vdup.i16 q12, r3 -0: - vld4.8 {d4-d7}, [r1]! - vld4.8 {d0-d3}, [r0] -.if \bgr2rgb - vswp d4, d6 @ BGR->RGB -.endif - vmovl.u8 q11, d7 - @ d = (((s-d)*a+255)>>8)+d - vsubl.u8 q8, d4, d0 - vsubl.u8 q9, d5, d1 - vsubl.u8 q10,d6, d2 - vmul.s16 q8, q8, q11 - vmul.s16 q9, q9, q11 - vmul.s16 q10,q10,q11 - vaddhn.i16 d4, q8, q12 - vaddhn.i16 d5, q9, q12 - vaddhn.i16 d6, q10,q12 - vadd.i8 q2, q0 - vadd.i8 d6, d2 - vmov.i8 d7, d3 - subs r2, r2, #8 - blt do_argb_finish - vst4.8 {d4-d7}, [r0]! - bxeq lr - nop - b 0b -.endm - - -@ void *dst, const void *src, int count, uint alpha -.macro do_argb_alphaS bgr2rgb +@ void *dst, const void *src, int count, uint global_alpha +.macro do_argb_alpha bgr2rgb global_alpha mov r12, #0xff +.if \global_alpha vdup.16 q11, r3 - vdup.16 q12, r12 +.endif + vdup.i16 q12, r12 0: + pld [r1, #64*2] + pld [r0, #64*2] vld4.8 {d4-d7}, [r1]! vld4.8 {d0-d3}, [r0] .if \bgr2rgb vswp d4, d6 @ BGR->RGB +.endif +.if !\global_alpha + vmovl.u8 q11, d7 .endif @ d = (((s-d)*a+255)>>8)+d vsubl.u8 q8, d4, d0 @@ -132,18 +106,18 @@ neon_ABGRtoXRGB: .global neon_ARGBtoXRGBalpha neon_ARGBtoXRGBalpha: - do_argb_alpha 0 + do_argb_alpha 0, 0 .global neon_ABGRtoXRGBalpha neon_ABGRtoXRGBalpha: - do_argb_alpha 1 + do_argb_alpha 1, 0 .global neon_ARGBtoXRGBalphaS neon_ARGBtoXRGBalphaS: - do_argb_alphaS 0 + do_argb_alpha 0, 1 .global neon_ABGRtoXRGBalphaS neon_ABGRtoXRGBalphaS: - do_argb_alphaS 1 + do_argb_alpha 1, 1 @ vim:filetype=armasm -- 2.39.5