Merged SDL 1.3 revision 5424, fixing a crash in the joystick code on recent kernels.
[sdl_omap.git] / src / video / SDL_blit_neon.S
... / ...
CommitLineData
1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011,2012
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
11.text
12.align 2
13
14#define func(name) \
15 .global name; \
16 name
17
18@ void *dst, const void *src, int count, uint abits
19.macro do_argb bgr2rgb
20 vdup.i8 d0, r3
210:
22 vld4.8 {d4-d7}, [r1]!
23.if \bgr2rgb
24 vswp d4, d6 @ BGR->RGB
25.endif
26 vmov.i8 d7, d0
27 subs r2, r2, #8
28 blt do_argb_finish
29 vst4.8 {d4-d7}, [r0]!
30 bxeq lr
31 nop
32 b 0b
33.endm
34
35@ void *dst, const void *src, int count, uint global_alpha
36.macro do_argb_alpha bgr2rgb global_alpha
37 mov r12, #0xff
38.if \global_alpha
39 vdup.16 q11, r3
40.endif
41 vdup.i16 q12, r12
420:
43 pld [r1, #64*2]
44 pld [r0, #64*2]
45 vld4.8 {d4-d7}, [r1]!
46 vld4.8 {d0-d3}, [r0]
47.if \bgr2rgb
48 vswp d4, d6 @ BGR->RGB
49.endif
50.if !\global_alpha
51 vmovl.u8 q11, d7
52.endif
53 @ d = (((s-d)*a+255)>>8)+d
54 vsubl.u8 q8, d4, d0
55 vsubl.u8 q9, d5, d1
56 vsubl.u8 q10,d6, d2
57 vmul.s16 q8, q8, q11
58 vmul.s16 q9, q9, q11
59 vmul.s16 q10,q10,q11
60 vaddhn.i16 d4, q8, q12
61 vaddhn.i16 d5, q9, q12
62 vaddhn.i16 d6, q10,q12
63 vadd.i8 q2, q0
64 vadd.i8 d6, d2
65 vmov.i8 d7, d3
66 subs r2, r2, #8
67 blt do_argb_finish
68 vst4.8 {d4-d7}, [r0]!
69 bxeq lr
70 nop
71 b 0b
72.endm
73
74
75do_argb_finish:
76 add r2, r2, #8
77 vzip.8 d4, d5 @ RRR..|GGG.. -> RGRG..
78 vzip.8 d6, d7 @ BBB..|000.. -> B0B0..
79 vzip.16 q2, q3
80
81 vst1.32 d4[0], [r0]!
82 cmp r2, #1
83 bxle lr
84 vst1.32 d4[1], [r0]!
85 cmp r2, #2
86 bxle lr
87 vst1.32 d5[0], [r0]!
88 cmp r2, #3
89 bxle lr
90 vst1.32 d5[1], [r0]!
91 cmp r2, #4
92 bxle lr
93 vst1.32 d6[0], [r0]!
94 cmp r2, #5
95 bxle lr
96 vst1.32 d6[1], [r0]!
97 cmp r2, #6
98 bxle lr
99 vst1.32 d7[0], [r0]!
100 bx lr
101
102
103@ void *dst, const void *src, int count, uint global_alpha
104.macro do_argb_to_rgb565_alpha bgr2rgb global_alpha
105 mov r12, #0xff
106.if \global_alpha
107 vdup.16 q11, r3
108.endif
109 vdup.i16 q12, r12
1100:
111 pld [r1, #64*2]
112 pld [r0, #64*2]
113 vld4.8 {d4-d7}, [r1]!
114 vld2.8 {d1-d2}, [r0]
115.if \bgr2rgb
116 vswp d4, d6 @ BGR->RGB
117.endif
118.if !\global_alpha
119 vmovl.u8 q11, d7
120.endif
121 vshl.i8 d0, d1, #3
122 vshr.u8 d1, d1, #3
123 vsri.i8 d0, d0, #5 @ B
124 vsli.i8 d1, d2, #5
125 vsri.i8 d2, d2, #5 @ R
126 vsri.i8 d1, d1, #6 @ G
127 @ d = (((s-d)*a+255)>>8)+d
128 vsubl.u8 q8, d4, d0
129 vsubl.u8 q9, d5, d1
130 vsubl.u8 q10,d6, d2
131 vmul.s16 q8, q8, q11
132 vmul.s16 q9, q9, q11
133 vmul.s16 q10,q10,q11
134 vaddhn.i16 d4, q8, q12
135 vaddhn.i16 d5, q9, q12
136 vaddhn.i16 d6, q10,q12
137 vadd.i8 q2, q0
138 vadd.i8 d2, d6 @ rrrr rrrr
139 vshr.u8 d0, d5, #2
140 vshr.u8 d1, d4, #3 @ 000b bbbb
141 vsri.i8 d2, d5, #5 @ rrrr rggg
142 vsli.i8 d1, d0, #5 @ gggb bbbb
143 subs r2, r2, #8
144 blt do_rgb565_finish
145 vst2.8 {d1-d2}, [r0]!
146 bxeq lr
147 nop
148 b 0b
149.endm
150
151
152do_rgb565_finish:
153 vzip.8 d1, d2
154 add r2, r2, #8
155
156 vst1.16 d1[0], [r0]!
157 cmp r2, #1
158 bxle lr
159 vst1.16 d1[1], [r0]!
160 cmp r2, #2
161 bxle lr
162 vst1.16 d1[2], [r0]!
163 cmp r2, #3
164 bxle lr
165 vst1.16 d1[3], [r0]!
166 cmp r2, #4
167 bxle lr
168 vst1.16 d2[0], [r0]!
169 cmp r2, #5
170 bxle lr
171 vst1.16 d2[1], [r0]!
172 cmp r2, #6
173 bxle lr
174 vst1.16 d2[2], [r0]!
175 bx lr
176
177
178func(neon_ARGBtoXRGB):
179 do_argb 0
180
181func(neon_ABGRtoXRGB):
182 do_argb 1
183
184func(neon_ARGBtoXRGBalpha):
185 do_argb_alpha 0, 0
186
187func(neon_ABGRtoXRGBalpha):
188 do_argb_alpha 1, 0
189
190func(neon_ARGBtoXRGBalphaS):
191 do_argb_alpha 0, 1
192
193func(neon_ABGRtoXRGBalphaS):
194 do_argb_alpha 1, 1
195
196func(neon_ARGBtoRGB565alpha):
197 do_argb_to_rgb565_alpha 0, 0
198
199func(neon_ABGRtoRGB565alpha):
200 do_argb_to_rgb565_alpha 1, 0
201
202@ vim:filetype=armasm