NEONize a few more blit types
[sdl_omap.git] / src / video / SDL_blit_neon.S
CommitLineData
a1f34081 1/*
a1eff5db 2 * (C) GraÅžvydas "notaz" Ignotas, 2011,2012
a1f34081 3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11.text
12.align 2
13
2c4e54dd 14#define func(name) \
15 .global name; \
16 name
17
a1f34081 18@ void *dst, const void *src, int count, uint abits
19.macro do_argb bgr2rgb
20 vdup.i8 d0, r3
210:
22 vld4.8 {d4-d7}, [r1]!
23.if \bgr2rgb
24 vswp d4, d6 @ BGR->RGB
25.endif
26 vmov.i8 d7, d0
27 subs r2, r2, #8
28 blt do_argb_finish
29 vst4.8 {d4-d7}, [r0]!
30 bxeq lr
31 nop
32 b 0b
33.endm
34
a1eff5db 35@ void *dst, const void *src, int count, uint global_alpha
36.macro do_argb_alpha bgr2rgb global_alpha
bdfa6989 37 mov r12, #0xff
a1eff5db 38.if \global_alpha
bdfa6989 39 vdup.16 q11, r3
a1eff5db 40.endif
41 vdup.i16 q12, r12
bdfa6989 420:
a1eff5db 43 pld [r1, #64*2]
44 pld [r0, #64*2]
bdfa6989 45 vld4.8 {d4-d7}, [r1]!
46 vld4.8 {d0-d3}, [r0]
47.if \bgr2rgb
48 vswp d4, d6 @ BGR->RGB
a1eff5db 49.endif
50.if !\global_alpha
51 vmovl.u8 q11, d7
bdfa6989 52.endif
53 @ d = (((s-d)*a+255)>>8)+d
54 vsubl.u8 q8, d4, d0
55 vsubl.u8 q9, d5, d1
56 vsubl.u8 q10,d6, d2
57 vmul.s16 q8, q8, q11
58 vmul.s16 q9, q9, q11
59 vmul.s16 q10,q10,q11
60 vaddhn.i16 d4, q8, q12
61 vaddhn.i16 d5, q9, q12
62 vaddhn.i16 d6, q10,q12
63 vadd.i8 q2, q0
64 vadd.i8 d6, d2
65 vmov.i8 d7, d3
66 subs r2, r2, #8
67 blt do_argb_finish
68 vst4.8 {d4-d7}, [r0]!
69 bxeq lr
70 nop
71 b 0b
72.endm
73
74
a1f34081 75do_argb_finish:
76 add r2, r2, #8
77 vzip.8 d4, d5 @ RRR..|GGG.. -> RGRG..
78 vzip.8 d6, d7 @ BBB..|000.. -> B0B0..
79 vzip.16 q2, q3
80
81 vst1.32 d4[0], [r0]!
82 cmp r2, #1
83 bxle lr
84 vst1.32 d4[1], [r0]!
85 cmp r2, #2
86 bxle lr
87 vst1.32 d5[0], [r0]!
88 cmp r2, #3
89 bxle lr
90 vst1.32 d5[1], [r0]!
91 cmp r2, #4
92 bxle lr
93 vst1.32 d6[0], [r0]!
94 cmp r2, #5
95 bxle lr
96 vst1.32 d6[1], [r0]!
97 cmp r2, #6
98 bxle lr
99 vst1.32 d7[0], [r0]!
100 bx lr
101
102
2c4e54dd 103@ void *dst, const void *src, int count, uint global_alpha
104.macro do_argb_to_rgb565_alpha bgr2rgb global_alpha
105 mov r12, #0xff
106.if \global_alpha
107 vdup.16 q11, r3
108.endif
109 vdup.i16 q12, r12
1100:
111 pld [r1, #64*2]
112 pld [r0, #64*2]
113 vld4.8 {d4-d7}, [r1]!
114 vld2.8 {d1-d2}, [r0]
115.if \bgr2rgb
116 vswp d4, d6 @ BGR->RGB
117.endif
118.if !\global_alpha
119 vmovl.u8 q11, d7
120.endif
121 vshl.i8 d0, d1, #3
122 vshr.u8 d1, d1, #3
123 vsri.i8 d0, d0, #5 @ B
124 vsli.i8 d1, d2, #5
125 vsri.i8 d2, d2, #5 @ R
126 vsri.i8 d1, d1, #6 @ G
127 @ d = (((s-d)*a+255)>>8)+d
128 vsubl.u8 q8, d4, d0
129 vsubl.u8 q9, d5, d1
130 vsubl.u8 q10,d6, d2
131 vmul.s16 q8, q8, q11
132 vmul.s16 q9, q9, q11
133 vmul.s16 q10,q10,q11
134 vaddhn.i16 d4, q8, q12
135 vaddhn.i16 d5, q9, q12
136 vaddhn.i16 d6, q10,q12
137 vadd.i8 q2, q0
138 vadd.i8 d2, d6 @ rrrr rrrr
139 vshr.u8 d0, d5, #2
140 vshr.u8 d1, d4, #3 @ 000b bbbb
141 vsri.i8 d2, d5, #5 @ rrrr rggg
142 vsli.i8 d1, d0, #5 @ gggb bbbb
143 subs r2, r2, #8
144 blt do_rgb565_finish
145 vst2.8 {d1-d2}, [r0]!
146 bxeq lr
147 nop
148 b 0b
149.endm
150
151
152do_rgb565_finish:
153 vzip.8 d1, d2
154 add r2, r2, #8
155
156 vst1.16 d1[0], [r0]!
157 cmp r2, #1
158 bxle lr
159 vst1.16 d1[1], [r0]!
160 cmp r2, #2
161 bxle lr
162 vst1.16 d1[2], [r0]!
163 cmp r2, #3
164 bxle lr
165 vst1.16 d1[3], [r0]!
166 cmp r2, #4
167 bxle lr
168 vst1.16 d2[0], [r0]!
169 cmp r2, #5
170 bxle lr
171 vst1.16 d2[1], [r0]!
172 cmp r2, #6
173 bxle lr
174 vst1.16 d2[2], [r0]!
175 bx lr
176
177
178func(neon_ARGBtoXRGB):
a1f34081 179 do_argb 0
180
2c4e54dd 181func(neon_ABGRtoXRGB):
a1f34081 182 do_argb 1
183
2c4e54dd 184func(neon_ARGBtoXRGBalpha):
a1eff5db 185 do_argb_alpha 0, 0
a1f34081 186
2c4e54dd 187func(neon_ABGRtoXRGBalpha):
a1eff5db 188 do_argb_alpha 1, 0
a1f34081 189
2c4e54dd 190func(neon_ARGBtoXRGBalphaS):
a1eff5db 191 do_argb_alpha 0, 1
bdfa6989 192
2c4e54dd 193func(neon_ABGRtoXRGBalphaS):
a1eff5db 194 do_argb_alpha 1, 1
bdfa6989 195
2c4e54dd 196func(neon_ARGBtoRGB565alpha):
197 do_argb_to_rgb565_alpha 0, 0
198
199func(neon_ABGRtoRGB565alpha):
200 do_argb_to_rgb565_alpha 1, 0
201
a1f34081 202@ vim:filetype=armasm