/*
- * (C) Gražvydas "notaz" Ignotas, 2011
+ * (C) Gražvydas "notaz" Ignotas, 2011,2012
*
* This work is licensed under the terms of any of these licenses
* (at your option):
.text
.align 2
+#define func(name) \
+ .global name; \
+ name
+
@ void *dst, const void *src, int count, uint abits
.macro do_argb bgr2rgb
vdup.i8 d0, r3
b 0b
.endm
-@ void *dst, const void *src, int count
-.macro do_argb_alpha bgr2rgb
- mov r3, #0xff
- vdup.i16 q12, r3
+@ void *dst, const void *src, int count, uint global_alpha
+.macro do_argb_alpha bgr2rgb global_alpha
+ mov r12, #0xff
+.if \global_alpha
+ vdup.16 q11, r3
+.endif
+ vdup.i16 q12, r12
0:
+ pld [r1, #64*2]
+ pld [r0, #64*2]
vld4.8 {d4-d7}, [r1]!
vld4.8 {d0-d3}, [r0]
.if \bgr2rgb
vswp d4, d6 @ BGR->RGB
.endif
+.if !\global_alpha
vmovl.u8 q11, d7
+.endif
@ d = (((s-d)*a+255)>>8)+d
vsubl.u8 q8, d4, d0
vsubl.u8 q9, d5, d1
.endm
-@ void *dst, const void *src, int count, uint alpha
-.macro do_argb_alphaS bgr2rgb
+do_argb_finish:
+ add r2, r2, #8
+ vzip.8 d4, d5 @ RRR..|GGG.. -> RGRG..
+ vzip.8 d6, d7 @ BBB..|000.. -> B0B0..
+ vzip.16 q2, q3
+
+ vst1.32 d4[0], [r0]!
+ cmp r2, #1
+ bxle lr
+ vst1.32 d4[1], [r0]!
+ cmp r2, #2
+ bxle lr
+ vst1.32 d5[0], [r0]!
+ cmp r2, #3
+ bxle lr
+ vst1.32 d5[1], [r0]!
+ cmp r2, #4
+ bxle lr
+ vst1.32 d6[0], [r0]!
+ cmp r2, #5
+ bxle lr
+ vst1.32 d6[1], [r0]!
+ cmp r2, #6
+ bxle lr
+ vst1.32 d7[0], [r0]!
+ bx lr
+
+
+@ void *dst, const void *src, int count, uint global_alpha
+.macro do_argb_to_rgb565_alpha bgr2rgb global_alpha
mov r12, #0xff
+.if \global_alpha
vdup.16 q11, r3
- vdup.16 q12, r12
+.endif
+ vdup.i16 q12, r12
0:
+ pld [r1, #64*2]
+ pld [r0, #64*2]
vld4.8 {d4-d7}, [r1]!
- vld4.8 {d0-d3}, [r0]
+ vld2.8 {d1-d2}, [r0]
.if \bgr2rgb
vswp d4, d6 @ BGR->RGB
.endif
+.if !\global_alpha
+ vmovl.u8 q11, d7
+.endif
+ vshl.i8 d0, d1, #3
+ vshr.u8 d1, d1, #3
+ vsri.i8 d0, d0, #5 @ B
+ vsli.i8 d1, d2, #5
+ vsri.i8 d2, d2, #5 @ R
+ vsri.i8 d1, d1, #6 @ G
@ d = (((s-d)*a+255)>>8)+d
vsubl.u8 q8, d4, d0
vsubl.u8 q9, d5, d1
vaddhn.i16 d5, q9, q12
vaddhn.i16 d6, q10,q12
vadd.i8 q2, q0
- vadd.i8 d6, d2
- vmov.i8 d7, d3
+ vadd.i8 d2, d6 @ rrrr rrrr
+ vshr.u8 d0, d5, #2
+ vshr.u8 d1, d4, #3 @ 000b bbbb
+ vsri.i8 d2, d5, #5 @ rrrr rggg
+ vsli.i8 d1, d0, #5 @ gggb bbbb
subs r2, r2, #8
- blt do_argb_finish
- vst4.8 {d4-d7}, [r0]!
+ blt do_rgb565_finish
+ vst2.8 {d1-d2}, [r0]!
bxeq lr
nop
b 0b
.endm
-do_argb_finish:
+do_rgb565_finish:
+ vzip.8 d1, d2
add r2, r2, #8
- vzip.8 d4, d5 @ RRR..|GGG.. -> RGRG..
- vzip.8 d6, d7 @ BBB..|000.. -> B0B0..
- vzip.16 q2, q3
-
- vst1.32 d4[0], [r0]!
+
+ vst1.16 d1[0], [r0]!
cmp r2, #1
bxle lr
- vst1.32 d4[1], [r0]!
+ vst1.16 d1[1], [r0]!
cmp r2, #2
bxle lr
- vst1.32 d5[0], [r0]!
+ vst1.16 d1[2], [r0]!
cmp r2, #3
bxle lr
- vst1.32 d5[1], [r0]!
+ vst1.16 d1[3], [r0]!
cmp r2, #4
bxle lr
- vst1.32 d6[0], [r0]!
+ vst1.16 d2[0], [r0]!
cmp r2, #5
bxle lr
- vst1.32 d6[1], [r0]!
+ vst1.16 d2[1], [r0]!
cmp r2, #6
bxle lr
- vst1.32 d7[0], [r0]!
+ vst1.16 d2[2], [r0]!
bx lr
-.global neon_ARGBtoXRGB
-neon_ARGBtoXRGB:
+func(neon_ARGBtoXRGB):
do_argb 0
-.global neon_ABGRtoXRGB
-neon_ABGRtoXRGB:
+func(neon_ABGRtoXRGB):
do_argb 1
-.global neon_ARGBtoXRGBalpha
-neon_ARGBtoXRGBalpha:
- do_argb_alpha 0
+func(neon_ARGBtoXRGBalpha):
+ do_argb_alpha 0, 0
+
+func(neon_ABGRtoXRGBalpha):
+ do_argb_alpha 1, 0
+
+func(neon_ARGBtoXRGBalphaS):
+ do_argb_alpha 0, 1
-.global neon_ABGRtoXRGBalpha
-neon_ABGRtoXRGBalpha:
- do_argb_alpha 1
+func(neon_ABGRtoXRGBalphaS):
+ do_argb_alpha 1, 1
-.global neon_ARGBtoXRGBalphaS
-neon_ARGBtoXRGBalphaS:
- do_argb_alphaS 0
+func(neon_ARGBtoRGB565alpha):
+ do_argb_to_rgb565_alpha 0, 0
-.global neon_ABGRtoXRGBalphaS
-neon_ABGRtoXRGBalphaS:
- do_argb_alphaS 1
+func(neon_ABGRtoRGB565alpha):
+ do_argb_to_rgb565_alpha 1, 0
@ vim:filetype=armasm