Commit | Line | Data |
---|---|---|
7fc3ac8a H |
1 | @@\r |
2 | @@ Copyright (C) 2012 Roman Pauer\r | |
3 | @@\r | |
4 | @@ Permission is hereby granted, free of charge, to any person obtaining a copy of\r | |
5 | @@ this software and associated documentation files (the "Software"), to deal in\r | |
6 | @@ the Software without restriction, including without limitation the rights to\r | |
7 | @@ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies\r | |
8 | @@ of the Software, and to permit persons to whom the Software is furnished to do\r | |
9 | @@ so, subject to the following conditions:\r | |
10 | @@\r | |
11 | @@ The above copyright notice and this permission notice shall be included in all\r | |
12 | @@ copies or substantial portions of the Software.\r | |
13 | @@\r | |
14 | @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r | |
15 | @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r | |
16 | @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\r | |
17 | @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\r | |
18 | @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\r | |
19 | @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r | |
20 | @@ SOFTWARE.\r | |
21 | @@\r | |
22 | \r | |
23 | .arm\r | |
24 | \r | |
33fa890d | 25 | #include "neon_normalxx.Sinc"\r |
67381db0 | 26 | #include "neon_eagle2x.Sinc"\r |
7fc3ac8a H |
27 | \r |
28 | .global neon_eagle2x_8_8\r | |
29 | .global neon_eagle2x_16_16\r | |
30 | .global neon_eagle2x_8_16\r | |
31 | \r | |
32 | .align 4\r | |
33 | neon_eagle2x_8_8:\r | |
34 | \r | |
35 | @ r0 = const uint8_t *src\r | |
36 | @ r1 = uint8_t *dst\r | |
37 | @ r2 = unsigned int width (pixels)\r | |
38 | @ r3 = unsigned int srcstride (bytes)\r | |
39 | @ [sp] = unsigned int dststride (bytes)\r | |
40 | @ [sp+4] = unsigned int height\r | |
41 | @ lr = return address\r | |
42 | \r | |
43 | ldr ip, [sp] @ ip = dststride\r | |
44 | push {r4-r10}\r | |
45 | ldr r9, [sp, #(8*4)] @ r9 = height\r | |
46 | sub r4, r0, r3 @ r4 = src - srcstride\r | |
47 | mov r10, sp @ oldsp = sp\r | |
48 | add r5, r0, r3 @ r5 = src + srcstride\r | |
49 | bic sp, sp, #31 @ align sp to 32 bytes\r | |
50 | add r6, r1, ip @ r6 = dst + dststride\r | |
51 | sub sp, sp, #64 @ sp -= 64\r | |
52 | sub r3, r3, r2 @ r3 = srcstride - width\r | |
67381db0 | 53 | vst1.64 {d8-d11}, [sp,:256] @ save q4,q5\r |
7fc3ac8a H |
54 | add r7, sp, #32 @ r7 = sp + 32\r |
55 | sub ip, ip, r2 @ ip = dststride - width\r | |
67381db0 | 56 | vst1.64 {d12-d15}, [r7,:256] @ save q6,q7\r |
7fc3ac8a H |
57 | lsl ip, #1 @ ip = 2 * dststride - 2 * width\r |
58 | mov r7, r2 @ r7 = width\r | |
59 | sub r9, r9, #2 @ r9 = height - 2\r | |
60 | \r | |
61 | \r | |
62 | @ r0 = src\r | |
63 | @ r1 = dst\r | |
64 | @ r2 = width\r | |
65 | @ r3 = srcdiff (srcstride - width)\r | |
66 | @ r4 = src - srcstride\r | |
67 | @ r5 = src + srcstride\r | |
68 | @ r6 = dst + dststride\r | |
69 | @ r7 = counter\r | |
70 | @ r8 = tmpreg\r | |
71 | @ r9 = height\r | |
72 | @ r10 = oldsp\r | |
73 | @ ip = dstdiff (2 * dststride - 2 * width)\r | |
74 | \r | |
75 | @ first line\r | |
76 | neon_eagle2x_8_8_line first, r4, r0, r5, r7, r1, r6, r8, 0, 0\r | |
77 | \r | |
78 | add r0, r0, r3\r | |
79 | add r4, r4, r3\r | |
80 | add r5, r5, r3\r | |
81 | add r1, r1, ip\r | |
82 | add r6, r6, ip\r | |
83 | \r | |
84 | @ middle lines\r | |
85 | 101:\r | |
86 | mov r7, r2\r | |
87 | \r | |
88 | neon_eagle2x_8_8_line middle, r4, r0, r5, r7, r1, r6, r8, 0, 0\r | |
89 | \r | |
90 | subS r9, r9, #1\r | |
91 | add r0, r0, r3\r | |
92 | add r4, r4, r3\r | |
93 | add r5, r5, r3\r | |
94 | add r1, r1, ip\r | |
95 | add r6, r6, ip\r | |
96 | bne 101b\r | |
97 | \r | |
98 | @ last line\r | |
99 | mov r7, r2\r | |
100 | \r | |
101 | neon_eagle2x_8_8_line last, r4, r0, r5, r7, r1, r6, r8, 0, 0\r | |
102 | \r | |
103 | add ip, sp, #32 @ ip = sp + 32\r | |
67381db0 | 104 | vld1.64 {d8-d11}, [sp,:256] @ restore q4,q5\r |
7fc3ac8a | 105 | mov sp, r10 @ sp = oldsp\r |
67381db0 | 106 | vld1.64 {d12-d15}, [ip,:256] @ restore q6,q7\r |
7fc3ac8a H |
107 | pop {r4-r10}\r |
108 | bx lr\r | |
109 | \r | |
110 | @ end procedure neon_eagle2x_8_8\r | |
111 | \r | |
112 | \r | |
113 | neon_eagle2x_16_16:\r | |
114 | \r | |
115 | @ r0 = const uint16_t *src\r | |
116 | @ r1 = uint16_t *dst\r | |
117 | @ r2 = unsigned int width (pixels)\r | |
118 | @ r3 = unsigned int srcstride (bytes)\r | |
119 | @ [sp] = unsigned int dststride (bytes)\r | |
120 | @ [sp+4] = unsigned int height\r | |
121 | @ lr = return address\r | |
122 | \r | |
123 | ldr ip, [sp] @ ip = dststride\r | |
124 | push {r4-r10}\r | |
125 | ldr r9, [sp, #(8*4)] @ r9 = height\r | |
126 | sub r4, r0, r3 @ r4 = src - srcstride\r | |
127 | mov r10, sp @ oldsp = sp\r | |
128 | add r5, r0, r3 @ r5 = src + srcstride\r | |
129 | bic sp, sp, #31 @ align sp to 32 bytes\r | |
130 | add r6, r1, ip @ r6 = dst + dststride\r | |
131 | sub sp, sp, #64 @ sp -= 64\r | |
132 | sub r3, r3, r2, lsl #1 @ r3 = srcstride - 2 * width\r | |
67381db0 | 133 | vst1.64 {d8-d11}, [sp,:256] @ save q4,q5\r |
7fc3ac8a H |
134 | add r7, sp, #32 @ r7 = sp + 32\r |
135 | sub ip, ip, r2, lsl #1 @ ip = dststride - 2 * width\r | |
67381db0 | 136 | vst1.64 {d12-d15}, [r7,:256] @ save q6,q7\r |
7fc3ac8a H |
137 | lsl ip, #1 @ ip = 2 * dststride - 4 * width\r |
138 | mov r7, r2 @ r7 = width\r | |
139 | sub r9, r9, #2 @ r9 = height - 2\r | |
140 | \r | |
141 | @ r0 = src\r | |
142 | @ r1 = dst\r | |
143 | @ r2 = width\r | |
144 | @ r3 = srcdiff (srcstride - 2 * width)\r | |
145 | @ r4 = src - srcstride\r | |
146 | @ r5 = src + srcstride\r | |
147 | @ r6 = dst + dststride\r | |
148 | @ r7 = counter\r | |
149 | @ r8 = tmpreg\r | |
150 | @ r9 = height\r | |
151 | @ r10 = oldsp\r | |
152 | @ ip = dstdiff (2 * dststride - 4 * width)\r | |
153 | \r | |
154 | @ first line\r | |
155 | neon_eagle2x_16_16_line first, r4, r0, r5, r7, r1, r6, r8, 0, 0\r | |
156 | \r | |
157 | add r0, r0, r3\r | |
158 | add r4, r4, r3\r | |
159 | add r5, r5, r3\r | |
160 | add r1, r1, ip\r | |
161 | add r6, r6, ip\r | |
162 | \r | |
163 | @ middle lines\r | |
164 | 101:\r | |
165 | mov r7, r2\r | |
166 | \r | |
167 | neon_eagle2x_16_16_line middle, r4, r0, r5, r7, r1, r6, r8, 0, 0\r | |
168 | \r | |
169 | subS r9, r9, #1\r | |
170 | add r0, r0, r3\r | |
171 | add r4, r4, r3\r | |
172 | add r5, r5, r3\r | |
173 | add r1, r1, ip\r | |
174 | add r6, r6, ip\r | |
175 | bne 101b\r | |
176 | \r | |
177 | @ last line\r | |
178 | mov r7, r2\r | |
179 | \r | |
180 | neon_eagle2x_16_16_line last, r4, r0, r5, r7, r1, r6, r8, 0, 0\r | |
181 | \r | |
182 | add ip, sp, #32 @ ip = sp + 32\r | |
67381db0 | 183 | vld1.64 {d8-d11}, [sp,:256] @ restore q4,q5\r |
7fc3ac8a | 184 | mov sp, r10 @ sp = oldsp\r |
67381db0 | 185 | vld1.64 {d12-d15}, [ip,:256] @ restore q6,q7\r |
7fc3ac8a H |
186 | pop {r4-r10}\r |
187 | bx lr\r | |
188 | \r | |
189 | @ end procedure neon_eagle2x_16_16\r | |
190 | \r | |
191 | \r | |
192 | neon_eagle2x_8_16:\r | |
193 | \r | |
194 | @ r0 = const uint8_t *src\r | |
195 | @ r1 = uint8_t *dst\r | |
196 | @ r2 = const uint32_t *palette\r | |
197 | @ r3 = unsigned int width (pixels)\r | |
198 | @ [sp] = unsigned int srcstride (bytes)\r | |
199 | @ [sp+4] = unsigned int dststride (bytes)\r | |
200 | @ [sp+8] = unsigned int height\r | |
201 | @ lr = return address\r | |
202 | \r | |
203 | @ three temporary lines\r | |
204 | \r | |
205 | ldr ip, [sp] @ ip = srcstride\r | |
206 | push {r4-r11,lr}\r | |
207 | ldr r4, [sp, #(4*10)] @ r4 = dststride\r | |
208 | ldr r5, [sp, #(4*11)] @ r5 = height\r | |
209 | mov r6, sp @ r6 = sp\r | |
210 | sub ip, ip, r3 @ ip = srcstride - width\r | |
211 | bic sp, sp, #31 @ align sp to 32 bytes\r | |
212 | sub r7, r4, r3, lsl #1 @ r7 = dststride - 2 * width\r | |
213 | sub sp, sp, r3, lsl #1 @ sp -= 2 * width\r | |
214 | sub r5, r5, #2 @ height -= 2\r | |
215 | mov r10, sp @ tmpline3 = sp\r | |
216 | lsl r7, #1 @ r7 = 2 * dststride - 4 * width\r | |
217 | bic sp, sp, #31 @ align sp to 32 bytes\r | |
218 | sub sp, sp, r3, lsl #1 @ sp -= 2 * width\r | |
219 | mov r11, sp @ tmpline2 = sp\r | |
220 | bic sp, sp, #31 @ align sp to 32 bytes\r | |
221 | sub sp, sp, r3, lsl #1 @ sp -= 2 * width\r | |
222 | mov lr, sp @ tmpline1 = sp\r | |
223 | bic sp, sp, #31 @ align sp to 32 bytes\r | |
224 | sub r8, sp, #64 @ r8 = sp - 64\r | |
67381db0 | 225 | vst1.64 {d8-d11}, [r8,:256] @ save q4,q5\r |
7fc3ac8a | 226 | sub r9, sp, #32 @ r9 = sp - 32\r |
67381db0 | 227 | vst1.64 {d12-d15}, [r9,:256]@ save q6,q7\r |
7fc3ac8a H |
228 | sub sp, sp, #(36 + 64) @ sp -= (36 + 64)\r |
229 | str r6, [sp] @ oldsp = r6\r | |
230 | str r5, [sp, #4] @ height = r5\r | |
231 | str ip, [sp, #8] @ srcdiff = ip\r | |
232 | str r7, [sp, #12] @ dstdiff = r7\r | |
233 | str r4, [sp, #16] @ dststride = r4\r | |
234 | str lr, [sp, #20] @ tmpline1 = lr\r | |
235 | str r11, [sp, #24] @ tmpline2 = r11\r | |
236 | str r10, [sp, #28] @ tmpline3 = r10\r | |
237 | str r3, [sp, #32] @ width = r3\r | |
238 | \r | |
239 | @ r0 = src\r | |
240 | @ r1 = dst\r | |
241 | @ r2 = palette\r | |
242 | @ r3 = counter\r | |
243 | @ r4 = dst2\r | |
244 | \r | |
245 | @ r11 = bufptr1\r | |
246 | @ ip = bufptr2\r | |
247 | @ lr = bufptr3\r | |
248 | \r | |
249 | @ [sp] = oldsp\r | |
250 | @ [sp, #4] = height\r | |
251 | @ [sp, #8] = srcdiff (srcstride - width)\r | |
252 | @ [sp, #12] = dstdiff (2 * dststride - 4 * width)\r | |
253 | @ [sp, #16] = dststride\r | |
254 | @ [sp, #20] = tmpline1\r | |
255 | @ [sp, #24] = tmpline2\r | |
256 | @ [sp, #28] = tmpline3\r | |
257 | @ [sp, #32] = width\r | |
258 | \r | |
259 | @ lr = tmpline1\r | |
260 | @ r3 = counter\r | |
261 | \r | |
262 | @ first line\r | |
263 | neon_normal1x_8_16_line r0, lr, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, ip\r | |
264 | \r | |
265 | ldr r7, [sp, #8] @ r7 = srcdiff\r | |
266 | ldr r3, [sp, #32] @ counter = width\r | |
267 | ldr lr, [sp, #24] @ bufptr3 = tmpline2\r | |
268 | add r0, r0, r7 @ src += srcdiff\r | |
269 | \r | |
270 | @ second line\r | |
271 | neon_normal1x_8_16_line r0, lr, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, ip\r | |
272 | \r | |
273 | ldr r9, [sp, #16] @ r9 = dststride\r | |
274 | ldr r3, [sp, #32] @ counter = width\r | |
275 | ldr ip, [sp, #20] @ bufptr2 = tmpline1\r | |
276 | ldr lr, [sp, #24] @ bufptr3 = tmpline2\r | |
277 | add r4, r1, r9 @ dst2 = dst + dststride\r | |
278 | \r | |
279 | @ first temporary line\r | |
280 | neon_eagle2x_16_16_line first, r11, ip, lr, r3, r1, r4, r5, 1, 0\r | |
281 | \r | |
282 | ldr r7, [sp, #8] @ r7 = srcdiff\r | |
283 | ldr r8, [sp, #12] @ r8 = dstdiff\r | |
284 | ldr r3, [sp, #32] @ counter = width\r | |
285 | ldr lr, [sp, #28] @ bufptr3 = tmpline3\r | |
286 | add r0, r0, r7 @ src += srcdiff\r | |
287 | add r1, r1, r8 @ dst += dstdiff\r | |
288 | \r | |
289 | 100:\r | |
290 | \r | |
291 | @ line n+1\r | |
292 | neon_normal1x_8_16_line r0, lr, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, ip\r | |
293 | \r | |
294 | ldr r9, [sp, #16] @ r9 = dststride\r | |
295 | ldr r11, [sp, #20] @ bufptr1 = tmpline1\r | |
296 | ldr ip, [sp, #24] @ bufptr2 = tmpline2\r | |
297 | ldr lr, [sp, #28] @ bufptr3 = tmpline3\r | |
298 | add r4, r1, r9 @ dst2 = dst + dststride\r | |
299 | ldr r3, [sp, #32] @ counter = width\r | |
300 | str r11, [sp, #28] @ tmpline3 = bufptr1\r | |
301 | str ip, [sp, #20] @ tmpline1 = bufptr2\r | |
302 | str lr, [sp, #24] @ tmpline2 = bufptr3\r | |
303 | \r | |
304 | @ temporary line n\r | |
305 | neon_eagle2x_16_16_line middle, r11, ip, lr, r3, r1, r4, r5, 1, 0\r | |
306 | \r | |
307 | ldr r6, [sp, #4] @ r6 = height\r | |
308 | ldr r7, [sp, #8] @ r7 = srcdiff\r | |
309 | ldr r8, [sp, #12] @ r8 = dstdiff\r | |
310 | ldr r3, [sp, #32] @ counter = width\r | |
311 | subS r6, r6, #1 @ height--\r | |
312 | ldr lr, [sp, #28] @ bufptr3 = tmpline3\r | |
313 | add r0, r0, r7 @ src += srcdiff\r | |
314 | add r1, r1, r8 @ dst += dstdiff\r | |
315 | str r6, [sp, #4] @ height = r6\r | |
316 | bne 100b\r | |
317 | \r | |
318 | \r | |
319 | ldr r9, [sp, #16] @ r9 = dststride\r | |
320 | ldr r11, [sp, #20] @ bufptr1 = tmpline1\r | |
321 | ldr ip, [sp, #24] @ bufptr2 = tmpline2\r | |
322 | add r4, r1, r9 @ dst2 = dst + dststride\r | |
323 | \r | |
324 | @ last temporary line\r | |
325 | neon_eagle2x_16_16_line last, r11, ip, lr, r3, r1, r4, r5, 1, 0\r | |
326 | \r | |
327 | \r | |
328 | add r6, sp, #36 @ r6 = sp + 36\r | |
329 | ldr sp, [sp] @ sp = oldsp\r | |
67381db0 | 330 | vld1.64 {d8-d11}, [r6,:256] @ restore q4,q5\r |
7fc3ac8a | 331 | add ip, r6, #32 @ ip = r6 + 32\r |
67381db0 | 332 | vld1.64 {d12-d15}, [ip,:256]@ restore q6,q7\r |
7fc3ac8a H |
333 | pop {r4-r11,lr}\r |
334 | bx lr\r | |
335 | \r | |
336 | @ end procedure neon_eagle2x_8_16\r | |
337 | \r |