anti-tear modes for Wiz
[libpicofe.git] / common / arm_utils.s
CommitLineData
6ab2f79c 1@ vim:filetype=armasm\r
720ee7f6 2@ some color conversion and blitting routines\r
3\r
0480e6c9 4@ (c) Copyright 2006, 2007 notaz\r
720ee7f6 5@ All Rights Reserved\r
6\r
49fe50f0 7@ vim:filetype=armasm\r
720ee7f6 8\r
0480e6c9 9.text\r
10.align 4\r
11\r
720ee7f6 12@ Convert 0000bbb0 ggg0rrr0 0000bbb0 ggg0rrr0\r
13@ to 00000000 rrr00000 ggg00000 bbb00000 ...\r
14\r
15@ lr = 0x00e000e0, out: r3=lower_pix, r2=higher_pix; trashes rin\r
16@ if sh==2, r8=0x00404040 (sh!=0 destroys flags!)\r
17.macro convRGB32_2 rin sh=0\r
18 and r2, lr, \rin, lsr #4 @ blue\r
19 and r3, \rin, lr\r
20 orr r2, r2, r3, lsl #8 @ g0b0g0b0\r
21\r
22 mov r3, r2, lsl #16 @ g0b00000\r
23 and \rin,lr, \rin, ror #12 @ 00r000r0 (reversed)\r
24 orr r3, r3, \rin, lsr #16 @ g0b000r0\r
25.if \sh == 1\r
26 mov r3, r3, ror #17 @ shadow mode\r
27.elseif \sh == 2\r
28 adds r3, r3, #0x40000000 @ green\r
29 orrcs r3, r3, #0xe0000000\r
30 mov r3, r3, ror #8\r
31 adds r3, r3, #0x40000000\r
32 orrcs r3, r3, #0xe0000000\r
33 mov r3, r3, ror #16\r
34 adds r3, r3, #0x40000000\r
35 orrcs r3, r3, #0xe0000000\r
36 mov r3, r3, ror #24\r
37.else\r
38 mov r3, r3, ror #16 @ r3=low\r
39.endif\r
40\r
41 orr r3, r3, r3, lsr #3\r
42 str r3, [r0], #4\r
43\r
44 mov r2, r2, lsr #16\r
45 orr r2, r2, \rin, lsl #16\r
46.if \sh == 1\r
47 mov r2, r2, lsr #1\r
48.elseif \sh == 2\r
49 mov r2, r2, ror #8\r
50 adds r2, r2, #0x40000000 @ blue\r
51 orrcs r2, r2, #0xe0000000\r
52 mov r2, r2, ror #8\r
53 adds r2, r2, #0x40000000\r
54 orrcs r2, r2, #0xe0000000\r
55 mov r2, r2, ror #8\r
56 adds r2, r2, #0x40000000\r
57 orrcs r2, r2, #0xe0000000\r
58 mov r2, r2, ror #8\r
59.endif\r
60\r
61 orr r2, r2, r2, lsr #3\r
62 str r2, [r0], #4\r
63.endm\r
64\r
65\r
66.global vidConvCpyRGB32 @ void *to, void *from, int pixels\r
67\r
68vidConvCpyRGB32:\r
69 stmfd sp!, {r4-r7,lr}\r
70\r
71 mov r12, r2, lsr #3 @ repeats\r
72 mov lr, #0x00e00000\r
73 orr lr, lr, #0x00e0\r
74\r
75.loopRGB32:\r
76 subs r12, r12, #1\r
77\r
78 ldmia r1!, {r4-r7}\r
79 convRGB32_2 r4\r
80 convRGB32_2 r5\r
81 convRGB32_2 r6\r
82 convRGB32_2 r7\r
83\r
84 bgt .loopRGB32\r
85\r
86 ldmfd sp!, {r4-r7,lr}\r
87 bx lr\r
88\r
89\r
90.global vidConvCpyRGB32sh @ void *to, void *from, int pixels\r
91\r
92vidConvCpyRGB32sh:\r
93 stmfd sp!, {r4-r7,lr}\r
94\r
95 mov r12, r2, lsr #3 @ repeats\r
96 mov lr, #0x00e00000\r
97 orr lr, lr, #0x00e0\r
98\r
99.loopRGB32sh:\r
100 subs r12, r12, #1\r
101\r
102 ldmia r1!, {r4-r7}\r
103 convRGB32_2 r4, 1\r
104 convRGB32_2 r5, 1\r
105 convRGB32_2 r6, 1\r
106 convRGB32_2 r7, 1\r
107\r
108 bgt .loopRGB32sh\r
109\r
110 ldmfd sp!, {r4-r7,lr}\r
111 bx lr\r
112\r
113\r
114.global vidConvCpyRGB32hi @ void *to, void *from, int pixels\r
115\r
116vidConvCpyRGB32hi:\r
117 stmfd sp!, {r4-r7,lr}\r
118\r
119 mov r12, r2, lsr #3 @ repeats\r
120 mov lr, #0x00e00000\r
121 orr lr, lr, #0x00e0\r
122\r
123.loopRGB32hi:\r
124 ldmia r1!, {r4-r7}\r
125 convRGB32_2 r4, 2\r
126 convRGB32_2 r5, 2\r
127 convRGB32_2 r6, 2\r
128 convRGB32_2 r7, 2\r
129\r
130 subs r12, r12, #1\r
131 bgt .loopRGB32hi\r
132\r
133 ldmfd sp!, {r4-r7,lr}\r
134 bx lr\r
135\r
136\r
137@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
138\r
139\r
b7911801 140@ mode2 blitter\r
141.global vidcpy_m2 @ void *dest, void *src, int m32col, int with_32c_border\r
142vidcpy_m2:\r
720ee7f6 143 stmfd sp!, {r4-r6,lr}\r
144\r
145 mov r12, #224 @ lines\r
b7911801 146 add r0, r0, #320*8\r
720ee7f6 147 add r1, r1, #8\r
b7911801 148 mov lr, #0\r
149\r
150 tst r2, r2\r
151 movne lr, #64\r
152 tstne r3, r3\r
153 addne r0, r0, #32\r
720ee7f6 154\r
b7911801 155vidCpyM2_loop_out:\r
720ee7f6 156 mov r6, #10\r
b7911801 157 sub r6, r6, lr, lsr #5 @ -= 2 in 32col mode\r
158vidCpyM2_loop:\r
720ee7f6 159 subs r6, r6, #1\r
b7911801 160 ldmia r1!, {r2-r5}\r
161 stmia r0!, {r2-r5}\r
162 ldmia r1!, {r2-r5}\r
163 stmia r0!, {r2-r5}\r
164 bne vidCpyM2_loop\r
165\r
720ee7f6 166 subs r12,r12,#1\r
b7911801 167 add r0, r0, lr\r
720ee7f6 168 add r1, r1, #8\r
b7911801 169 add r1, r1, lr\r
170 bne vidCpyM2_loop_out\r
720ee7f6 171\r
b7911801 172 ldmfd sp!, {r4-r6,pc}\r
720ee7f6 173\r
720ee7f6 174\r
b7911801 175.global vidcpy_m2_rot @ void *dest, void *src, int m32col, int with_32c_border\r
176vidcpy_m2_rot:\r
177 stmfd sp!,{r4-r8,lr}\r
720ee7f6 178 add r1, r1, #8\r
b7911801 179 tst r2, r2\r
180 subne r1, r1, #32 @ adjust\r
181\r
182 mov r4, r0\r
183 mov r5, r1\r
184 mov r6, r2\r
185 mov r7, #8+4\r
186\r
187vidcpy_m2_rot_loop:\r
188 @ a bit lame but oh well..\r
189 mov r0, r4\r
190 mov r1, r5\r
191 mov r2, r7\r
192 mov r3, r6\r
193 mov r8, #328\r
194 adr lr, after_rot_blit8\r
195 stmfd sp!,{r4-r8,lr}\r
196 b rotated_blit8_2\r
197\r
198after_rot_blit8:\r
199 add r5, r5, #328*4\r
200 add r7, r7, #4\r
201 cmp r7, #224+8+4\r
202 ldmgefd sp!,{r4-r8,pc}\r
203 b vidcpy_m2_rot_loop\r
204\r
205\r
206.global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col\r
207rotated_blit8:\r
208 stmfd sp!,{r4-r8,lr}\r
209 mov r8, #320\r
210\r
211rotated_blit8_2:\r
212 add r0, r0, #(240*320)\r
213 sub r0, r0, #(240+4) @ y starts from 4\r
214 add r0, r0, r2\r
215\r
216 tst r3, r3\r
217 subne r0, r0, #(240*32)\r
218 addne r1, r1, #32\r
219 movne lr, #256/4\r
220 moveq lr, #320/4\r
221\r
222rotated_blit_loop8:\r
223 mov r6, r1\r
224 ldr r2, [r6], r8\r
225 ldr r3, [r6], r8\r
226 ldr r4, [r6], r8\r
227 ldr r5, [r6], r8\r
228\r
229 mov r6, r2, lsl #24\r
230 mov r6, r6, lsr #8\r
231 orr r6, r6, r3, lsl #24\r
232 mov r6, r6, lsr #8\r
233 orr r6, r6, r4, lsl #24\r
234 mov r6, r6, lsr #8\r
235 orr r6, r6, r5, lsl #24\r
236 str r6, [r0], #-240\r
237\r
238 and r6, r3, #0xff00\r
239 and r7, r2, #0xff00\r
240 orr r6, r6, r7, lsr #8\r
241 and r7, r4, #0xff00\r
242 orr r6, r6, r7, lsl #8\r
243 and r7, r5, #0xff00\r
244 orr r6, r6, r7, lsl #16\r
245 str r6, [r0], #-240\r
246\r
247 and r6, r4, #0xff0000\r
248 and r7, r2, #0xff0000\r
249 orr r6, r6, r7, lsr #16\r
250 and r7, r3, #0xff0000\r
251 orr r6, r6, r7, lsr #8\r
252 and r7, r5, #0xff0000\r
253 orr r6, r6, r7, lsl #8\r
254 str r6, [r0], #-240\r
255\r
256 mov r6, r5, lsr #24\r
257 mov r6, r6, lsl #8\r
258 orr r6, r6, r4, lsr #24\r
259 mov r6, r6, lsl #8\r
260 orr r6, r6, r3, lsr #24\r
261 mov r6, r6, lsl #8\r
262 orr r6, r6, r2, lsr #24\r
263 str r6, [r0], #-240\r
264\r
265 subs lr, lr, #1\r
266 add r1, r1, #4\r
267 bne rotated_blit_loop8\r
268\r
269 ldmfd sp!,{r4-r8,pc}\r
270\r
271\r
272@ input: r2-r5\r
273@ output: r7,r8\r
274@ trash: r6\r
275.macro rb_line_low\r
276 mov r6, r2, lsl #16\r
277 mov r7, r3, lsl #16\r
278 orr r7, r7, r6, lsr #16\r
279 mov r6, r4, lsl #16\r
280 mov r8, r5, lsl #16\r
281 orr r8, r8, r6, lsr #16\r
282.endm\r
720ee7f6 283\r
b7911801 284.macro rb_line_hi\r
285 mov r6, r2, lsr #16\r
286 mov r7, r3, lsr #16\r
287 orr r7, r6, r7, lsl #16\r
288 mov r6, r4, lsr #16\r
289 mov r8, r5, lsr #16\r
290 orr r8, r6, r8, lsl #16\r
291.endm\r
720ee7f6 292\r
b7911801 293.global rotated_blit16 @ void *dst, void *linesx4, u32 y, int is_32col\r
294rotated_blit16:\r
295 stmfd sp!,{r4-r8,lr}\r
296\r
297 add r0, r0, #(240*320)*2\r
298 sub r0, r0, #(240+4)*2 @ y starts from 4\r
299 add r0, r0, r2, lsl #1\r
300\r
301 tst r3, r3\r
302 subne r0, r0, #(240*32)*2\r
303 addne r1, r1, #32*2\r
304 movne lr, #256/4\r
305 moveq lr, #320/4\r
306\r
307rotated_blit_loop16:\r
308 ldr r2, [r1, #320*0*2]\r
309 ldr r3, [r1, #320*1*2]\r
310 ldr r4, [r1, #320*2*2]\r
311 ldr r5, [r1, #320*3*2]\r
312 rb_line_low\r
313 stmia r0, {r7,r8}\r
314 sub r0, r0, #240*2\r
315 rb_line_hi\r
316 stmia r0, {r7,r8}\r
317 sub r0, r0, #240*2\r
318\r
319 ldr r2, [r1, #320*0*2+4]\r
320 ldr r3, [r1, #320*1*2+4]\r
321 ldr r4, [r1, #320*2*2+4]\r
322 ldr r5, [r1, #320*3*2+4]\r
323 rb_line_low\r
324 stmia r0, {r7,r8}\r
325 sub r0, r0, #240*2\r
326 rb_line_hi\r
327 stmia r0, {r7,r8}\r
328 sub r0, r0, #240*2\r
329\r
330 subs lr, lr, #1\r
720ee7f6 331 add r1, r1, #8\r
b7911801 332 bne rotated_blit_loop16\r
333\r
334 ldmfd sp!,{r4-r8,pc}\r
720ee7f6 335\r
336\r
720ee7f6 337.global spend_cycles @ c\r
338\r
339spend_cycles:\r
340 mov r0, r0, lsr #2 @ 4 cycles/iteration\r
341 sub r0, r0, #2 @ entry/exit/init\r
342.sc_loop:\r
343 subs r0, r0, #1\r
344 bpl .sc_loop\r
345\r
346 bx lr\r
49fe50f0 347\r