add some more NEON blitters from _wb_
[sdl_omap.git] / src / video / SDL_blit_neon.S
CommitLineData
a1f34081 1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11.text
12.align 2
13
14@ void *dst, const void *src, int count, uint abits
15.macro do_argb bgr2rgb
16 vdup.i8 d0, r3
170:
18 vld4.8 {d4-d7}, [r1]!
19.if \bgr2rgb
20 vswp d4, d6 @ BGR->RGB
21.endif
22 vmov.i8 d7, d0
23 subs r2, r2, #8
24 blt do_argb_finish
25 vst4.8 {d4-d7}, [r0]!
26 bxeq lr
27 nop
28 b 0b
29.endm
30
31@ void *dst, const void *src, int count
32.macro do_argb_alpha bgr2rgb
33 mov r3, #0xff
34 vdup.i16 q12, r3
350:
36 vld4.8 {d4-d7}, [r1]!
37 vld4.8 {d0-d3}, [r0]
38.if \bgr2rgb
39 vswp d4, d6 @ BGR->RGB
40.endif
41 vmovl.u8 q11, d7
42 @ d = (((s-d)*a+255)>>8)+d
43 vsubl.u8 q8, d4, d0
44 vsubl.u8 q9, d5, d1
45 vsubl.u8 q10,d6, d2
46 vmul.s16 q8, q8, q11
47 vmul.s16 q9, q9, q11
48 vmul.s16 q10,q10,q11
49 vaddhn.i16 d4, q8, q12
50 vaddhn.i16 d5, q9, q12
51 vaddhn.i16 d6, q10,q12
52 vadd.i8 q2, q0
53 vadd.i8 d6, d2
54 vmov.i8 d7, d3
55 subs r2, r2, #8
56 blt do_argb_finish
57 vst4.8 {d4-d7}, [r0]!
58 bxeq lr
59 nop
60 b 0b
61.endm
62
bdfa6989 63
64@ void *dst, const void *src, int count, uint alpha
65.macro do_argb_alphaS bgr2rgb
66 mov r12, #0xff
67 vdup.16 q11, r3
68 vdup.16 q12, r12
690:
70 vld4.8 {d4-d7}, [r1]!
71 vld4.8 {d0-d3}, [r0]
72.if \bgr2rgb
73 vswp d4, d6 @ BGR->RGB
74.endif
75 @ d = (((s-d)*a+255)>>8)+d
76 vsubl.u8 q8, d4, d0
77 vsubl.u8 q9, d5, d1
78 vsubl.u8 q10,d6, d2
79 vmul.s16 q8, q8, q11
80 vmul.s16 q9, q9, q11
81 vmul.s16 q10,q10,q11
82 vaddhn.i16 d4, q8, q12
83 vaddhn.i16 d5, q9, q12
84 vaddhn.i16 d6, q10,q12
85 vadd.i8 q2, q0
86 vadd.i8 d6, d2
87 vmov.i8 d7, d3
88 subs r2, r2, #8
89 blt do_argb_finish
90 vst4.8 {d4-d7}, [r0]!
91 bxeq lr
92 nop
93 b 0b
94.endm
95
96
a1f34081 97do_argb_finish:
98 add r2, r2, #8
99 vzip.8 d4, d5 @ RRR..|GGG.. -> RGRG..
100 vzip.8 d6, d7 @ BBB..|000.. -> B0B0..
101 vzip.16 q2, q3
102
103 vst1.32 d4[0], [r0]!
104 cmp r2, #1
105 bxle lr
106 vst1.32 d4[1], [r0]!
107 cmp r2, #2
108 bxle lr
109 vst1.32 d5[0], [r0]!
110 cmp r2, #3
111 bxle lr
112 vst1.32 d5[1], [r0]!
113 cmp r2, #4
114 bxle lr
115 vst1.32 d6[0], [r0]!
116 cmp r2, #5
117 bxle lr
118 vst1.32 d6[1], [r0]!
119 cmp r2, #6
120 bxle lr
121 vst1.32 d7[0], [r0]!
122 bx lr
123
124
125.global neon_ARGBtoXRGB
126neon_ARGBtoXRGB:
127 do_argb 0
128
129.global neon_ABGRtoXRGB
130neon_ABGRtoXRGB:
131 do_argb 1
132
133.global neon_ARGBtoXRGBalpha
134neon_ARGBtoXRGBalpha:
135 do_argb_alpha 0
136
137.global neon_ABGRtoXRGBalpha
138neon_ABGRtoXRGBalpha:
139 do_argb_alpha 1
140
bdfa6989 141.global neon_ARGBtoXRGBalphaS
142neon_ARGBtoXRGBalphaS:
143 do_argb_alphaS 0
144
145.global neon_ABGRtoXRGBalphaS
146neon_ABGRtoXRGBalphaS:
147 do_argb_alphaS 1
148
a1f34081 149@ vim:filetype=armasm