Add performance level
[picodrive.git] / platform / common / arm_utils.s
... / ...
CommitLineData
1/*\r
2 * some color conversion and blitting routines\r
3 * (C) notaz, 2006-2009\r
4 *\r
5 * This work is licensed under the terms of MAME license.\r
6 * See COPYING file in the top-level directory.\r
7 */\r
8\r
9.text\r
10.align 4\r
11\r
12@ Convert 0000bbb0 ggg0rrr0 0000bbb0 ggg0rrr0\r
13@ to 00000000 rrr00000 ggg00000 bbb00000 ...\r
14\r
15@ lr = 0x00e000e0, out: r3=lower_pix, r2=higher_pix; trashes rin\r
16.macro convRGB32_2 rin sh=0\r
17 and r2, lr, \rin, lsr #4 @ blue\r
18 and r3, \rin, lr\r
19 orr r2, r2, r3, lsl #8 @ g0b0g0b0\r
20\r
21 mov r3, r2, lsl #16 @ g0b00000\r
22 and \rin,lr, \rin, ror #12 @ 00r000r0 (reversed)\r
23 orr r3, r3, \rin, lsr #16 @ g0b000r0\r
24.if \sh == 1\r
25 mov r3, r3, ror #17 @ shadow mode\r
26.elseif \sh == 2\r
27 adds r3, r3, #0x40000000 @ green\r
28 orrcs r3, r3, #0xe0000000\r
29 mov r3, r3, ror #8\r
30 adds r3, r3, #0x40000000\r
31 orrcs r3, r3, #0xe0000000\r
32 mov r3, r3, ror #16\r
33 adds r3, r3, #0x40000000\r
34 orrcs r3, r3, #0xe0000000\r
35 mov r3, r3, ror #24\r
36.else\r
37 mov r3, r3, ror #16 @ r3=low\r
38.endif\r
39\r
40 orr r3, r3, r3, lsr #3\r
41 str r3, [r0], #4\r
42\r
43 mov r2, r2, lsr #16\r
44 orr r2, r2, \rin, lsl #16\r
45.if \sh == 1\r
46 mov r2, r2, lsr #1\r
47.elseif \sh == 2\r
48 mov r2, r2, ror #8\r
49 adds r2, r2, #0x40000000 @ blue\r
50 orrcs r2, r2, #0xe0000000\r
51 mov r2, r2, ror #8\r
52 adds r2, r2, #0x40000000\r
53 orrcs r2, r2, #0xe0000000\r
54 mov r2, r2, ror #8\r
55 adds r2, r2, #0x40000000\r
56 orrcs r2, r2, #0xe0000000\r
57 mov r2, r2, ror #8\r
58.endif\r
59\r
60 orr r2, r2, r2, lsr #3\r
61.if \sh == 1\r
62 str r2, [r0, #0x40*2*4]\r
63.endif\r
64 str r2, [r0], #4\r
65.endm\r
66\r
67\r
68.global bgr444_to_rgb32 @ void *to, void *from\r
69\r
70bgr444_to_rgb32:\r
71 stmfd sp!, {r4-r7,lr}\r
72\r
73 mov r12, #0x40>>3 @ repeats\r
74 mov lr, #0x00e00000\r
75 orr lr, lr, #0x00e0\r
76\r
77.loopRGB32:\r
78 subs r12, r12, #1\r
79\r
80 ldmia r1!, {r4-r7}\r
81 convRGB32_2 r4\r
82 convRGB32_2 r5\r
83 convRGB32_2 r6\r
84 convRGB32_2 r7\r
85 bgt .loopRGB32\r
86\r
87 ldmfd sp!, {r4-r7,pc}\r
88\r
89\r
90.global bgr444_to_rgb32_sh @ void *to, void *from\r
91\r
92bgr444_to_rgb32_sh:\r
93 stmfd sp!, {r4-r7,lr}\r
94\r
95 mov r12, #0x40>>3 @ repeats\r
96 add r0, r0, #0x40*4\r
97 mov lr, #0x00e00000\r
98 orr lr, lr, #0x00e0\r
99\r
100.loopRGB32sh:\r
101 subs r12, r12, #1\r
102\r
103 ldmia r1!, {r4-r7}\r
104 convRGB32_2 r4, 1\r
105 convRGB32_2 r5, 1\r
106 convRGB32_2 r6, 1\r
107 convRGB32_2 r7, 1\r
108 bgt .loopRGB32sh\r
109\r
110 mov r12, #0x40>>3 @ repeats\r
111 sub r1, r1, #0x40*2\r
112\r
113.loopRGB32hi:\r
114 ldmia r1!, {r4-r7}\r
115 convRGB32_2 r4, 2\r
116 convRGB32_2 r5, 2\r
117 convRGB32_2 r6, 2\r
118 convRGB32_2 r7, 2\r
119\r
120 subs r12, r12, #1\r
121 bgt .loopRGB32hi\r
122\r
123 ldmfd sp!, {r4-r7,lr}\r
124 bx lr\r
125\r
126\r
127@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
128\r
129\r
130@ mode2 blitter\r
131.global vidcpy_m2 @ void *dest, void *src, int m32col, int with_32c_border\r
132vidcpy_m2:\r
133 stmfd sp!, {r4-r6,lr}\r
134\r
135 mov r12, #224 @ lines\r
136 add r0, r0, #320*8\r
137 add r1, r1, #8\r
138 mov lr, #0\r
139\r
140 tst r2, r2\r
141 movne lr, #64\r
142 tstne r3, r3\r
143 addne r0, r0, #32\r
144\r
145vidCpyM2_loop_out:\r
146 mov r6, #10\r
147 sub r6, r6, lr, lsr #5 @ -= 2 in 32col mode\r
148vidCpyM2_loop:\r
149 subs r6, r6, #1\r
150 ldmia r1!, {r2-r5}\r
151 stmia r0!, {r2-r5}\r
152 ldmia r1!, {r2-r5}\r
153 stmia r0!, {r2-r5}\r
154 bne vidCpyM2_loop\r
155\r
156 subs r12,r12,#1\r
157 add r0, r0, lr\r
158 add r1, r1, #8\r
159 add r1, r1, lr\r
160 bne vidCpyM2_loop_out\r
161\r
162 ldmfd sp!, {r4-r6,pc}\r
163\r
164\r
165.global vidcpy_m2_rot @ void *dest, void *src, int m32col, int with_32c_border\r
166vidcpy_m2_rot:\r
167 stmfd sp!,{r4-r8,lr}\r
168 add r1, r1, #8\r
169 tst r2, r2\r
170 subne r1, r1, #32 @ adjust\r
171\r
172 mov r4, r0\r
173 mov r5, r1\r
174 mov r6, r2\r
175 mov r7, #8+4\r
176\r
177vidcpy_m2_rot_loop:\r
178 @ a bit lame but oh well..\r
179 mov r0, r4\r
180 mov r1, r5\r
181 mov r2, r7\r
182 mov r3, r6\r
183 mov r8, #328\r
184 adr lr, after_rot_blit8\r
185 stmfd sp!,{r4-r8,lr}\r
186 b rotated_blit8_2\r
187\r
188after_rot_blit8:\r
189 add r5, r5, #328*4\r
190 add r7, r7, #4\r
191 cmp r7, #224+8+4\r
192 ldmgefd sp!,{r4-r8,pc}\r
193 b vidcpy_m2_rot_loop\r
194\r
195\r
196.global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col\r
197rotated_blit8:\r
198 stmfd sp!,{r4-r8,lr}\r
199 mov r8, #320\r
200\r
201rotated_blit8_2:\r
202 add r0, r0, #(240*320)\r
203 sub r0, r0, #(240+4) @ y starts from 4\r
204 add r0, r0, r2\r
205\r
206 tst r3, r3\r
207 subne r0, r0, #(240*32)\r
208 addne r1, r1, #32\r
209 movne lr, #256/4\r
210 moveq lr, #320/4\r
211\r
212rotated_blit_loop8:\r
213 mov r6, r1\r
214 ldr r2, [r6], r8\r
215 ldr r3, [r6], r8\r
216 ldr r4, [r6], r8\r
217 ldr r5, [r6], r8\r
218\r
219 mov r6, r2, lsl #24\r
220 mov r6, r6, lsr #8\r
221 orr r6, r6, r3, lsl #24\r
222 mov r6, r6, lsr #8\r
223 orr r6, r6, r4, lsl #24\r
224 mov r6, r6, lsr #8\r
225 orr r6, r6, r5, lsl #24\r
226 str r6, [r0], #-240\r
227\r
228 and r6, r3, #0xff00\r
229 and r7, r2, #0xff00\r
230 orr r6, r6, r7, lsr #8\r
231 and r7, r4, #0xff00\r
232 orr r6, r6, r7, lsl #8\r
233 and r7, r5, #0xff00\r
234 orr r6, r6, r7, lsl #16\r
235 str r6, [r0], #-240\r
236\r
237 and r6, r4, #0xff0000\r
238 and r7, r2, #0xff0000\r
239 orr r6, r6, r7, lsr #16\r
240 and r7, r3, #0xff0000\r
241 orr r6, r6, r7, lsr #8\r
242 and r7, r5, #0xff0000\r
243 orr r6, r6, r7, lsl #8\r
244 str r6, [r0], #-240\r
245\r
246 mov r6, r5, lsr #24\r
247 mov r6, r6, lsl #8\r
248 orr r6, r6, r4, lsr #24\r
249 mov r6, r6, lsl #8\r
250 orr r6, r6, r3, lsr #24\r
251 mov r6, r6, lsl #8\r
252 orr r6, r6, r2, lsr #24\r
253 str r6, [r0], #-240\r
254\r
255 subs lr, lr, #1\r
256 add r1, r1, #4\r
257 bne rotated_blit_loop8\r
258\r
259 ldmfd sp!,{r4-r8,pc}\r
260\r
261\r
262@ input: r2-r5\r
263@ output: r7,r8\r
264@ trash: r6\r
265.macro rb_line_low\r
266 mov r6, r2, lsl #16\r
267 mov r7, r3, lsl #16\r
268 orr r7, r7, r6, lsr #16\r
269 mov r6, r4, lsl #16\r
270 mov r8, r5, lsl #16\r
271 orr r8, r8, r6, lsr #16\r
272.endm\r
273\r
274.macro rb_line_hi\r
275 mov r6, r2, lsr #16\r
276 mov r7, r3, lsr #16\r
277 orr r7, r6, r7, lsl #16\r
278 mov r6, r4, lsr #16\r
279 mov r8, r5, lsr #16\r
280 orr r8, r6, r8, lsl #16\r
281.endm\r
282\r
283.global rotated_blit16 @ void *dst, void *linesx4, u32 y, int is_32col\r
284rotated_blit16:\r
285 stmfd sp!,{r4-r8,lr}\r
286\r
287 add r0, r0, #(240*320)*2\r
288 sub r0, r0, #(240+4)*2 @ y starts from 4\r
289 add r0, r0, r2, lsl #1\r
290\r
291 tst r3, r3\r
292 subne r0, r0, #(240*32)*2\r
293 addne r1, r1, #32*2\r
294 movne lr, #256/4\r
295 moveq lr, #320/4\r
296\r
297rotated_blit_loop16:\r
298 ldr r2, [r1, #320*0*2]\r
299 ldr r3, [r1, #320*1*2]\r
300 ldr r4, [r1, #320*2*2]\r
301 ldr r5, [r1, #320*3*2]\r
302 rb_line_low\r
303 stmia r0, {r7,r8}\r
304 sub r0, r0, #240*2\r
305 rb_line_hi\r
306 stmia r0, {r7,r8}\r
307 sub r0, r0, #240*2\r
308\r
309 ldr r2, [r1, #320*0*2+4]\r
310 ldr r3, [r1, #320*1*2+4]\r
311 ldr r4, [r1, #320*2*2+4]\r
312 ldr r5, [r1, #320*3*2+4]\r
313 rb_line_low\r
314 stmia r0, {r7,r8}\r
315 sub r0, r0, #240*2\r
316 rb_line_hi\r
317 stmia r0, {r7,r8}\r
318 sub r0, r0, #240*2\r
319\r
320 subs lr, lr, #1\r
321 add r1, r1, #8\r
322 bne rotated_blit_loop16\r
323\r
324 ldmfd sp!,{r4-r8,pc}\r
325\r
326\r
327.global spend_cycles @ c\r
328\r
329spend_cycles:\r
330 mov r0, r0, lsr #2 @ 4 cycles/iteration\r
331 sub r0, r0, #2 @ entry/exit/init\r
332.sc_loop:\r
333 subs r0, r0, #1\r
334 bpl .sc_loop\r
335\r
336 bx lr\r
337\r
338@ vim:filetype=armasm\r