platform ps2, handle audio similar to psp
[picodrive.git] / platform / common / arm_utils.s
... / ...
CommitLineData
1/*\r
2 * some color conversion and blitting routines\r
3 * (C) notaz, 2006-2009\r
4 * (C) irixxxx, 2020-2023\r
5 *\r
6 * This work is licensed under the terms of MAME license.\r
7 * See COPYING file in the top-level directory.\r
8 */\r
9\r
10.text\r
11.align 4\r
12\r
13@ Convert 0000bbbb ggggrrrr 0000bbbb ggggrrrr\r
14@ to 00000000 rrrr0000 gggg0000 bbbb0000 ...\r
15\r
16@ lr = 0x00f000f0, out: r3=lower_pix, r2=higher_pix; trashes rin\r
17.macro convRGB32_2 rin sh=0\r
18 and r2, lr, \rin, lsr #4 @ blue\r
19 and r3, \rin, lr\r
20 orr r2, r2, r3, lsl #8 @ g0b0g0b0\r
21\r
22 mov r3, r2, lsl #16 @ g0b00000\r
23 and \rin,lr, \rin, ror #12 @ 00r000r0 (reversed)\r
24 orr r3, r3, \rin, lsr #16 @ g0b000r0\r
25.if \sh == 1\r
26 mov r3, r3, ror #17 @ shadow mode\r
27.elseif \sh == 2\r
28 adds r3, r3, #0x40000000 @ green\r
29 orrcs r3, r3, lr, lsl #24\r
30 mov r3, r3, ror #8\r
31 adds r3, r3, #0x40000000\r
32 orrcs r3, r3, lr, lsl #24\r
33 mov r3, r3, ror #16\r
34 adds r3, r3, #0x40000000\r
35 orrcs r3, r3, lr, lsl #24\r
36 mov r3, r3, ror #24\r
37.else\r
38 mov r3, r3, ror #16 @ r3=low\r
39.endif\r
40\r
41 orr r3, r3, r3, lsr #3\r
42 str r3, [r0], #4\r
43\r
44 mov r2, r2, lsr #16\r
45 orr r2, r2, \rin, lsl #16\r
46.if \sh == 1\r
47 mov r2, r2, lsr #1\r
48.elseif \sh == 2\r
49 mov r2, r2, ror #8\r
50 adds r2, r2, #0x40000000 @ blue\r
51 orrcs r2, r2, lr, lsl #24\r
52 mov r2, r2, ror #8\r
53 adds r2, r2, #0x40000000\r
54 orrcs r2, r2, lr, lsl #24\r
55 mov r2, r2, ror #8\r
56 adds r2, r2, #0x40000000\r
57 orrcs r2, r2, lr, lsl #24\r
58 mov r2, r2, ror #8\r
59.endif\r
60\r
61 orr r2, r2, r2, lsr #3\r
62 str r2, [r0], #4\r
63.endm\r
64\r
65\r
66.global bgr444_to_rgb32 @ void *to, void *from, unsigned entries\r
67\r
68bgr444_to_rgb32:\r
69 stmfd sp!, {r4-r7,lr}\r
70\r
71 mov r12, r2, lsr #3 @ repeats\r
72 mov lr, #0x00f00000\r
73 orr lr, lr, #0x00f0\r
74\r
75.loopRGB32:\r
76 ldmia r1!, {r4-r7}\r
77 convRGB32_2 r4\r
78 convRGB32_2 r5\r
79 convRGB32_2 r6\r
80 convRGB32_2 r7\r
81\r
82 subs r12, r12, #1\r
83 bgt .loopRGB32\r
84\r
85 ldmfd sp!, {r4-r7,pc}\r
86\r
87\r
88.global bgr444_to_rgb32_sh @ void *to, void *from\r
89\r
90bgr444_to_rgb32_sh:\r
91 stmfd sp!, {r4-r7,lr}\r
92\r
93 mov r12, #0x40>>3 @ repeats\r
94 add r0, r0, #0x40*4\r
95 mov lr, #0x00f00000\r
96 orr lr, lr, #0x00f0\r
97\r
98.loopRGB32sh:\r
99 ldmia r1!, {r4-r7}\r
100 convRGB32_2 r4, 2\r
101 convRGB32_2 r5, 2\r
102 convRGB32_2 r6, 2\r
103 convRGB32_2 r7, 2\r
104\r
105 subs r12, r12, #1\r
106 bgt .loopRGB32sh\r
107\r
108 mov r12, #0x40>>3 @ repeats\r
109 sub r1, r1, #0x40*2\r
110 and lr, lr, lr, lsl #1 @ kill LSB for correct shadow colors\r
111\r
112.loopRGB32hi:\r
113 ldmia r1!, {r4-r7}\r
114 convRGB32_2 r4, 1\r
115 convRGB32_2 r5, 1\r
116 convRGB32_2 r6, 1\r
117 convRGB32_2 r7, 1\r
118\r
119 subs r12, r12, #1\r
120 bgt .loopRGB32hi\r
121\r
122 ldmfd sp!, {r4-r7,lr}\r
123 bx lr\r
124\r
125\r
126@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
127\r
128.global vidcpy_8bit @ void *dest, void *src, int x_y, int w_h\r
129vidcpy_8bit:\r
130 stmfd sp!, {r4-r6,lr}\r
131\r
132 mov r12, r2, lsl #16 @ y\r
133\r
134 mov r4, r12, lsr #16-8 @ 320*y = 256*y+64*y\r
135 add r4, r4, r12, lsr #16-6\r
136 add r0, r0, r4 @ pd += 320*y + x\r
137 add r0, r0, r2, lsr #16\r
138\r
139 add r4, r4, r12, lsr #16-3 @ 328*y = 320*y + 8*y\r
140 add r1, r1, r4 @ ps += 328*y + x + 8\r
141 add r1, r1, r2, lsr #16\r
142 add r1, r1, #8\r
143\r
144 mov lr, r3, lsr #16 @ w\r
145 mov r12, r3, lsl #16 @ h\r
146\r
147vidCpy8bit_loop_out:\r
148 movs r6, lr, lsr #5\r
149@ beq vidCpy8bit_loop_end\r
150vidCpy8bit_loop:\r
151 subs r6, r6, #1\r
152 ldmia r1!, {r2-r5}\r
153 stmia r0!, {r2-r5}\r
154 ldmia r1!, {r2-r5}\r
155 stmia r0!, {r2-r5}\r
156 bne vidCpy8bit_loop\r
157\r
158 ands r6, lr, #0x0018\r
159 beq vidCpy8bit_loop_end\r
160vidCpy8bit_loop2:\r
161 ldmia r1!, {r2-r3}\r
162 subs r6, r6, #8\r
163 stmia r0!, {r2-r3}\r
164 bne vidCpy8bit_loop2\r
165\r
166vidCpy8bit_loop_end:\r
167 subs r12,r12,#1<<16\r
168 add r0, r0, #320\r
169 sub r0, r0, lr\r
170 add r1, r1, #328\r
171 sub r1, r1, lr\r
172 bne vidCpy8bit_loop_out\r
173\r
174 ldmfd sp!, {r4-r6,pc}\r
175\r
176\r
177.global vidcpy_8bit_rot @ void *dest, void *src, int x_y, int w_h\r
178vidcpy_8bit_rot:\r
179 stmfd sp!, {r4-r10,lr}\r
180\r
181 mov r12, r2, lsl #16 @ y\r
182\r
183 add r0, r0, r12, lsr #16 @ pd += y + (319-x)*240\r
184 mov r4, #320\r
185 sub r4, r4, #1\r
186 sub r4, r4, r2, lsr #16 @ (319-x)\r
187 add r0, r0, r4, lsl #8\r
188 sub r0, r0, r4, lsl #4\r
189\r
190 mov r4, r12, lsr #16-8 @ 328*y = 256*y + 64*y + 8*y\r
191 add r4, r4, r12, lsr #16-6\r
192 add r4, r4, r12, lsr #16-3\r
193 add r1, r1, r4 @ ps += 328*y + x + 8\r
194 add r1, r1, r2, lsr #16\r
195 add r1, r1, #8\r
196\r
197 mov lr, r3, lsr #16 @ w\r
198 mov r12, r3, lsl #16 @ h\r
199\r
200 mov r8, #328\r
201vidCpy8bitrot_loop_out:\r
202 mov r10, r0\r
203 movs r9, lr, lsr #2\r
204@ beq vidCpy8bitrot_loop_end\r
205vidCpy8bitrot_loop:\r
206 mov r6, r1\r
207 ldr r2, [r6], r8\r
208 ldr r3, [r6], r8\r
209 ldr r4, [r6], r8\r
210 ldr r5, [r6], r8\r
211\r
212 mov r6, r2, lsl #24\r
213 mov r6, r6, lsr #8\r
214 orr r6, r6, r3, lsl #24\r
215 mov r6, r6, lsr #8\r
216 orr r6, r6, r4, lsl #24\r
217 mov r6, r6, lsr #8\r
218 orr r6, r6, r5, lsl #24\r
219 str r6, [r0], #-240\r
220\r
221 and r6, r3, #0xff00\r
222 and r7, r2, #0xff00\r
223 orr r6, r6, r7, lsr #8\r
224 and r7, r4, #0xff00\r
225 orr r6, r6, r7, lsl #8\r
226 and r7, r5, #0xff00\r
227 orr r6, r6, r7, lsl #16\r
228 str r6, [r0], #-240\r
229\r
230 and r6, r4, #0xff0000\r
231 and r7, r2, #0xff0000\r
232 orr r6, r6, r7, lsr #16\r
233 and r7, r3, #0xff0000\r
234 orr r6, r6, r7, lsr #8\r
235 and r7, r5, #0xff0000\r
236 orr r6, r6, r7, lsl #8\r
237 str r6, [r0], #-240\r
238\r
239 mov r6, r5, lsr #24\r
240 mov r6, r6, lsl #8\r
241 orr r6, r6, r4, lsr #24\r
242 mov r6, r6, lsl #8\r
243 orr r6, r6, r3, lsr #24\r
244 mov r6, r6, lsl #8\r
245 orr r6, r6, r2, lsr #24\r
246 str r6, [r0], #-240\r
247\r
248 subs r9, r9, #1\r
249 add r1, r1, #4\r
250 bne vidCpy8bitrot_loop\r
251\r
252vidCpy8bitrot_loop_end:\r
253 subs r12,r12,#4<<16\r
254 add r0, r10, #4\r
255 sub r1, r1, lr\r
256 add r1, r1, #4*328\r
257 bne vidCpy8bitrot_loop_out\r
258\r
259 ldmfd sp!, {r4-r10,pc}\r
260\r
261\r
262.global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col\r
263rotated_blit8:\r
264 stmfd sp!,{r4-r8,lr}\r
265 mov r8, #320\r
266\r
267rotated_blit8_2:\r
268 add r0, r0, #(240*320)\r
269 sub r0, r0, #(240+4) @ y starts from 4\r
270 add r0, r0, r2\r
271\r
272 tst r3, r3\r
273 subne r0, r0, #(240*32)\r
274 addne r1, r1, #32\r
275 movne lr, #256/4\r
276 moveq lr, #320/4\r
277\r
278rotated_blit_loop8:\r
279 mov r6, r1\r
280 ldr r2, [r6], r8\r
281 ldr r3, [r6], r8\r
282 ldr r4, [r6], r8\r
283 ldr r5, [r6], r8\r
284\r
285 mov r6, r2, lsl #24\r
286 mov r6, r6, lsr #8\r
287 orr r6, r6, r3, lsl #24\r
288 mov r6, r6, lsr #8\r
289 orr r6, r6, r4, lsl #24\r
290 mov r6, r6, lsr #8\r
291 orr r6, r6, r5, lsl #24\r
292 str r6, [r0], #-240\r
293\r
294 and r6, r3, #0xff00\r
295 and r7, r2, #0xff00\r
296 orr r6, r6, r7, lsr #8\r
297 and r7, r4, #0xff00\r
298 orr r6, r6, r7, lsl #8\r
299 and r7, r5, #0xff00\r
300 orr r6, r6, r7, lsl #16\r
301 str r6, [r0], #-240\r
302\r
303 and r6, r4, #0xff0000\r
304 and r7, r2, #0xff0000\r
305 orr r6, r6, r7, lsr #16\r
306 and r7, r3, #0xff0000\r
307 orr r6, r6, r7, lsr #8\r
308 and r7, r5, #0xff0000\r
309 orr r6, r6, r7, lsl #8\r
310 str r6, [r0], #-240\r
311\r
312 mov r6, r5, lsr #24\r
313 mov r6, r6, lsl #8\r
314 orr r6, r6, r4, lsr #24\r
315 mov r6, r6, lsl #8\r
316 orr r6, r6, r3, lsr #24\r
317 mov r6, r6, lsl #8\r
318 orr r6, r6, r2, lsr #24\r
319 str r6, [r0], #-240\r
320\r
321 subs lr, lr, #1\r
322 add r1, r1, #4\r
323 bne rotated_blit_loop8\r
324\r
325 ldmfd sp!,{r4-r8,pc}\r
326\r
327\r
328@ input: r2-r5\r
329@ output: r7,r8\r
330@ trash: r6\r
331.macro rb_line_low\r
332 mov r6, r2, lsl #16\r
333 mov r7, r3, lsl #16\r
334 orr r7, r7, r6, lsr #16\r
335 mov r6, r4, lsl #16\r
336 mov r8, r5, lsl #16\r
337 orr r8, r8, r6, lsr #16\r
338.endm\r
339\r
340.macro rb_line_hi\r
341 mov r6, r2, lsr #16\r
342 mov r7, r3, lsr #16\r
343 orr r7, r6, r7, lsl #16\r
344 mov r6, r4, lsr #16\r
345 mov r8, r5, lsr #16\r
346 orr r8, r6, r8, lsl #16\r
347.endm\r
348\r
349.global rotated_blit16 @ void *dst, void *linesx4, u32 y, int is_32col\r
350rotated_blit16:\r
351 stmfd sp!,{r4-r8,lr}\r
352\r
353 add r0, r0, #(240*320)*2\r
354 sub r0, r0, #(240+4)*2 @ y starts from 4\r
355 add r0, r0, r2, lsl #1\r
356\r
357 tst r3, r3\r
358 subne r0, r0, #(240*32)*2\r
359 addne r1, r1, #32*2\r
360 movne lr, #256/4\r
361 moveq lr, #320/4\r
362\r
363rotated_blit_loop16:\r
364 ldr r2, [r1, #320*0*2]\r
365 ldr r3, [r1, #320*1*2]\r
366 ldr r4, [r1, #320*2*2]\r
367 ldr r5, [r1, #320*3*2]\r
368 rb_line_low\r
369 stmia r0, {r7,r8}\r
370 sub r0, r0, #240*2\r
371 rb_line_hi\r
372 stmia r0, {r7,r8}\r
373 sub r0, r0, #240*2\r
374\r
375 ldr r2, [r1, #320*0*2+4]\r
376 ldr r3, [r1, #320*1*2+4]\r
377 ldr r4, [r1, #320*2*2+4]\r
378 ldr r5, [r1, #320*3*2+4]\r
379 rb_line_low\r
380 stmia r0, {r7,r8}\r
381 sub r0, r0, #240*2\r
382 rb_line_hi\r
383 stmia r0, {r7,r8}\r
384 sub r0, r0, #240*2\r
385\r
386 subs lr, lr, #1\r
387 add r1, r1, #8\r
388 bne rotated_blit_loop16\r
389\r
390 ldmfd sp!,{r4-r8,pc}\r
391\r
392\r
393.global spend_cycles @ c\r
394\r
395spend_cycles:\r
396 mov r0, r0, lsr #2 @ 4 cycles/iteration\r
397 sub r0, r0, #2 @ entry/exit/init\r
398.sc_loop:\r
399 subs r0, r0, #1\r
400 bpl .sc_loop\r
401\r
402 bx lr\r
403\r
404@ vim:filetype=armasm\r