cff531af |
1 | /*\r |
2 | * some color conversion and blitting routines\r |
3 | * (C) notaz, 2006-2009\r |
7bf552b5 |
4 | * (C) irixxxx, 2020-2023\r |
cff531af |
5 | *\r |
6 | * This work is licensed under the terms of MAME license.\r |
7 | * See COPYING file in the top-level directory.\r |
8 | */\r |
cc68a136 |
9 | \r |
4a32f01f |
10 | .text\r |
11 | .align 4\r |
12 | \r |
9b2d466a |
13 | @ Convert 0000bbbb ggggrrrr 0000bbbb ggggrrrr\r |
14 | @ to 00000000 rrrr0000 gggg0000 bbbb0000 ...\r |
cc68a136 |
15 | \r |
9b2d466a |
16 | @ lr = 0x00f000f0, out: r3=lower_pix, r2=higher_pix; trashes rin\r |
cc68a136 |
17 | .macro convRGB32_2 rin sh=0\r |
18 | and r2, lr, \rin, lsr #4 @ blue\r |
19 | and r3, \rin, lr\r |
20 | orr r2, r2, r3, lsl #8 @ g0b0g0b0\r |
21 | \r |
22 | mov r3, r2, lsl #16 @ g0b00000\r |
23 | and \rin,lr, \rin, ror #12 @ 00r000r0 (reversed)\r |
24 | orr r3, r3, \rin, lsr #16 @ g0b000r0\r |
25 | .if \sh == 1\r |
26 | mov r3, r3, ror #17 @ shadow mode\r |
27 | .elseif \sh == 2\r |
28 | adds r3, r3, #0x40000000 @ green\r |
9b2d466a |
29 | orrcs r3, r3, lr, lsl #24\r |
cc68a136 |
30 | mov r3, r3, ror #8\r |
31 | adds r3, r3, #0x40000000\r |
9b2d466a |
32 | orrcs r3, r3, lr, lsl #24\r |
cc68a136 |
33 | mov r3, r3, ror #16\r |
34 | adds r3, r3, #0x40000000\r |
9b2d466a |
35 | orrcs r3, r3, lr, lsl #24\r |
cc68a136 |
36 | mov r3, r3, ror #24\r |
37 | .else\r |
38 | mov r3, r3, ror #16 @ r3=low\r |
39 | .endif\r |
40 | \r |
41 | orr r3, r3, r3, lsr #3\r |
42 | str r3, [r0], #4\r |
43 | \r |
44 | mov r2, r2, lsr #16\r |
45 | orr r2, r2, \rin, lsl #16\r |
46 | .if \sh == 1\r |
47 | mov r2, r2, lsr #1\r |
48 | .elseif \sh == 2\r |
49 | mov r2, r2, ror #8\r |
50 | adds r2, r2, #0x40000000 @ blue\r |
9b2d466a |
51 | orrcs r2, r2, lr, lsl #24\r |
cc68a136 |
52 | mov r2, r2, ror #8\r |
53 | adds r2, r2, #0x40000000\r |
9b2d466a |
54 | orrcs r2, r2, lr, lsl #24\r |
cc68a136 |
55 | mov r2, r2, ror #8\r |
56 | adds r2, r2, #0x40000000\r |
9b2d466a |
57 | orrcs r2, r2, lr, lsl #24\r |
cc68a136 |
58 | mov r2, r2, ror #8\r |
59 | .endif\r |
60 | \r |
61 | orr r2, r2, r2, lsr #3\r |
62 | str r2, [r0], #4\r |
63 | .endm\r |
64 | \r |
65 | \r |
ace18401 |
66 | .global bgr444_to_rgb32 @ void *to, void *from, unsigned entries\r |
cc68a136 |
67 | \r |
19954be1 |
68 | bgr444_to_rgb32:\r |
cc68a136 |
69 | stmfd sp!, {r4-r7,lr}\r |
70 | \r |
ace18401 |
71 | mov r12, r2, lsr #3 @ repeats\r |
9b2d466a |
72 | mov lr, #0x00f00000\r |
73 | orr lr, lr, #0x00f0\r |
cc68a136 |
74 | \r |
75 | .loopRGB32:\r |
cc68a136 |
76 | ldmia r1!, {r4-r7}\r |
77 | convRGB32_2 r4\r |
78 | convRGB32_2 r5\r |
79 | convRGB32_2 r6\r |
80 | convRGB32_2 r7\r |
9b2d466a |
81 | \r |
82 | subs r12, r12, #1\r |
cc68a136 |
83 | bgt .loopRGB32\r |
84 | \r |
19954be1 |
85 | ldmfd sp!, {r4-r7,pc}\r |
cc68a136 |
86 | \r |
87 | \r |
19954be1 |
88 | .global bgr444_to_rgb32_sh @ void *to, void *from\r |
cc68a136 |
89 | \r |
19954be1 |
90 | bgr444_to_rgb32_sh:\r |
cc68a136 |
91 | stmfd sp!, {r4-r7,lr}\r |
92 | \r |
19954be1 |
93 | mov r12, #0x40>>3 @ repeats\r |
94 | add r0, r0, #0x40*4\r |
9b2d466a |
95 | mov lr, #0x00f00000\r |
96 | orr lr, lr, #0x00f0\r |
cc68a136 |
97 | \r |
98 | .loopRGB32sh:\r |
cc68a136 |
99 | ldmia r1!, {r4-r7}\r |
466fa079 |
100 | convRGB32_2 r4, 2\r |
101 | convRGB32_2 r5, 2\r |
102 | convRGB32_2 r6, 2\r |
103 | convRGB32_2 r7, 2\r |
9b2d466a |
104 | \r |
105 | subs r12, r12, #1\r |
cc68a136 |
106 | bgt .loopRGB32sh\r |
107 | \r |
19954be1 |
108 | mov r12, #0x40>>3 @ repeats\r |
109 | sub r1, r1, #0x40*2\r |
9b2d466a |
110 | and lr, lr, lr, lsl #1 @ kill LSB for correct shadow colors\r |
cc68a136 |
111 | \r |
112 | .loopRGB32hi:\r |
9b2d466a |
113 | ldmia r1!, {r4-r7}\r |
466fa079 |
114 | convRGB32_2 r4, 1\r |
115 | convRGB32_2 r5, 1\r |
116 | convRGB32_2 r6, 1\r |
117 | convRGB32_2 r7, 1\r |
cc68a136 |
118 | \r |
119 | subs r12, r12, #1\r |
120 | bgt .loopRGB32hi\r |
121 | \r |
122 | ldmfd sp!, {r4-r7,lr}\r |
123 | bx lr\r |
124 | \r |
125 | \r |
126 | @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r |
127 | \r |
96948bdf |
128 | .global vidcpy_8bit @ void *dest, void *src, int x_y, int w_h\r |
129 | vidcpy_8bit:\r |
cc68a136 |
130 | stmfd sp!, {r4-r6,lr}\r |
131 | \r |
96948bdf |
132 | mov r12, r2, lsl #16 @ y\r |
133 | \r |
134 | mov r4, r12, lsr #16-8 @ 320*y = 256*y+64*y\r |
135 | add r4, r4, r12, lsr #16-6\r |
136 | add r0, r0, r4 @ pd += 320*y + x\r |
137 | add r0, r0, r2, lsr #16\r |
138 | \r |
139 | add r4, r4, r12, lsr #16-3 @ 328*y = 320*y + 8*y\r |
140 | add r1, r1, r4 @ ps += 328*y + x + 8\r |
141 | add r1, r1, r2, lsr #16\r |
cc68a136 |
142 | add r1, r1, #8\r |
cc41eb4f |
143 | \r |
96948bdf |
144 | mov lr, r3, lsr #16 @ w\r |
145 | mov r12, r3, lsl #16 @ h\r |
cc68a136 |
146 | \r |
96948bdf |
147 | vidCpy8bit_loop_out:\r |
110a49ed |
148 | movs r6, lr, lsr #5\r |
96948bdf |
149 | @ beq vidCpy8bit_loop_end\r |
150 | vidCpy8bit_loop:\r |
cc68a136 |
151 | subs r6, r6, #1\r |
cc41eb4f |
152 | ldmia r1!, {r2-r5}\r |
153 | stmia r0!, {r2-r5}\r |
154 | ldmia r1!, {r2-r5}\r |
155 | stmia r0!, {r2-r5}\r |
96948bdf |
156 | bne vidCpy8bit_loop\r |
157 | \r |
158 | ands r6, lr, #0x0018\r |
159 | beq vidCpy8bit_loop_end\r |
160 | vidCpy8bit_loop2:\r |
161 | ldmia r1!, {r2-r3}\r |
162 | subs r6, r6, #8\r |
163 | stmia r0!, {r2-r3}\r |
164 | bne vidCpy8bit_loop2\r |
165 | \r |
166 | vidCpy8bit_loop_end:\r |
167 | subs r12,r12,#1<<16\r |
168 | add r0, r0, #320\r |
169 | sub r0, r0, lr\r |
170 | add r1, r1, #328\r |
171 | sub r1, r1, lr\r |
172 | bne vidCpy8bit_loop_out\r |
cc68a136 |
173 | \r |
cc41eb4f |
174 | ldmfd sp!, {r4-r6,pc}\r |
cc68a136 |
175 | \r |
cc68a136 |
176 | \r |
96948bdf |
177 | .global vidcpy_8bit_rot @ void *dest, void *src, int x_y, int w_h\r |
178 | vidcpy_8bit_rot:\r |
179 | stmfd sp!, {r4-r10,lr}\r |
180 | \r |
181 | mov r12, r2, lsl #16 @ y\r |
182 | \r |
183 | add r0, r0, r12, lsr #16 @ pd += y + (319-x)*240\r |
184 | mov r4, #320\r |
185 | sub r4, r4, #1\r |
186 | sub r4, r4, r2, lsr #16 @ (319-x)\r |
187 | add r0, r0, r4, lsl #8\r |
188 | sub r0, r0, r4, lsl #4\r |
189 | \r |
190 | mov r4, r12, lsr #16-8 @ 328*y = 256*y + 64*y + 8*y\r |
191 | add r4, r4, r12, lsr #16-6\r |
192 | add r4, r4, r12, lsr #16-3\r |
193 | add r1, r1, r4 @ ps += 328*y + x + 8\r |
194 | add r1, r1, r2, lsr #16\r |
cc68a136 |
195 | add r1, r1, #8\r |
96948bdf |
196 | \r |
197 | mov lr, r3, lsr #16 @ w\r |
198 | mov r12, r3, lsl #16 @ h\r |
199 | \r |
cc41eb4f |
200 | mov r8, #328\r |
96948bdf |
201 | vidCpy8bitrot_loop_out:\r |
202 | mov r10, r0\r |
110a49ed |
203 | movs r9, lr, lsr #2\r |
96948bdf |
204 | @ beq vidCpy8bitrot_loop_end\r |
205 | vidCpy8bitrot_loop:\r |
206 | mov r6, r1\r |
207 | ldr r2, [r6], r8\r |
208 | ldr r3, [r6], r8\r |
209 | ldr r4, [r6], r8\r |
210 | ldr r5, [r6], r8\r |
211 | \r |
212 | mov r6, r2, lsl #24\r |
213 | mov r6, r6, lsr #8\r |
214 | orr r6, r6, r3, lsl #24\r |
215 | mov r6, r6, lsr #8\r |
216 | orr r6, r6, r4, lsl #24\r |
217 | mov r6, r6, lsr #8\r |
218 | orr r6, r6, r5, lsl #24\r |
219 | str r6, [r0], #-240\r |
220 | \r |
221 | and r6, r3, #0xff00\r |
222 | and r7, r2, #0xff00\r |
223 | orr r6, r6, r7, lsr #8\r |
224 | and r7, r4, #0xff00\r |
225 | orr r6, r6, r7, lsl #8\r |
226 | and r7, r5, #0xff00\r |
227 | orr r6, r6, r7, lsl #16\r |
228 | str r6, [r0], #-240\r |
229 | \r |
230 | and r6, r4, #0xff0000\r |
231 | and r7, r2, #0xff0000\r |
232 | orr r6, r6, r7, lsr #16\r |
233 | and r7, r3, #0xff0000\r |
234 | orr r6, r6, r7, lsr #8\r |
235 | and r7, r5, #0xff0000\r |
236 | orr r6, r6, r7, lsl #8\r |
237 | str r6, [r0], #-240\r |
238 | \r |
239 | mov r6, r5, lsr #24\r |
240 | mov r6, r6, lsl #8\r |
241 | orr r6, r6, r4, lsr #24\r |
242 | mov r6, r6, lsl #8\r |
243 | orr r6, r6, r3, lsr #24\r |
244 | mov r6, r6, lsl #8\r |
245 | orr r6, r6, r2, lsr #24\r |
246 | str r6, [r0], #-240\r |
247 | \r |
248 | subs r9, r9, #1\r |
249 | add r1, r1, #4\r |
250 | bne vidCpy8bitrot_loop\r |
251 | \r |
252 | vidCpy8bitrot_loop_end:\r |
253 | subs r12,r12,#4<<16\r |
254 | add r0, r10, #4\r |
255 | sub r1, r1, lr\r |
256 | add r1, r1, #4*328\r |
257 | bne vidCpy8bitrot_loop_out\r |
258 | \r |
259 | ldmfd sp!, {r4-r10,pc}\r |
cc41eb4f |
260 | \r |
261 | \r |
262 | .global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col\r |
263 | rotated_blit8:\r |
264 | stmfd sp!,{r4-r8,lr}\r |
265 | mov r8, #320\r |
266 | \r |
267 | rotated_blit8_2:\r |
268 | add r0, r0, #(240*320)\r |
269 | sub r0, r0, #(240+4) @ y starts from 4\r |
270 | add r0, r0, r2\r |
271 | \r |
272 | tst r3, r3\r |
273 | subne r0, r0, #(240*32)\r |
274 | addne r1, r1, #32\r |
275 | movne lr, #256/4\r |
276 | moveq lr, #320/4\r |
277 | \r |
278 | rotated_blit_loop8:\r |
279 | mov r6, r1\r |
280 | ldr r2, [r6], r8\r |
281 | ldr r3, [r6], r8\r |
282 | ldr r4, [r6], r8\r |
283 | ldr r5, [r6], r8\r |
284 | \r |
285 | mov r6, r2, lsl #24\r |
286 | mov r6, r6, lsr #8\r |
287 | orr r6, r6, r3, lsl #24\r |
288 | mov r6, r6, lsr #8\r |
289 | orr r6, r6, r4, lsl #24\r |
290 | mov r6, r6, lsr #8\r |
291 | orr r6, r6, r5, lsl #24\r |
292 | str r6, [r0], #-240\r |
293 | \r |
294 | and r6, r3, #0xff00\r |
295 | and r7, r2, #0xff00\r |
296 | orr r6, r6, r7, lsr #8\r |
297 | and r7, r4, #0xff00\r |
298 | orr r6, r6, r7, lsl #8\r |
299 | and r7, r5, #0xff00\r |
300 | orr r6, r6, r7, lsl #16\r |
301 | str r6, [r0], #-240\r |
302 | \r |
303 | and r6, r4, #0xff0000\r |
304 | and r7, r2, #0xff0000\r |
305 | orr r6, r6, r7, lsr #16\r |
306 | and r7, r3, #0xff0000\r |
307 | orr r6, r6, r7, lsr #8\r |
308 | and r7, r5, #0xff0000\r |
309 | orr r6, r6, r7, lsl #8\r |
310 | str r6, [r0], #-240\r |
311 | \r |
312 | mov r6, r5, lsr #24\r |
313 | mov r6, r6, lsl #8\r |
314 | orr r6, r6, r4, lsr #24\r |
315 | mov r6, r6, lsl #8\r |
316 | orr r6, r6, r3, lsr #24\r |
317 | mov r6, r6, lsl #8\r |
318 | orr r6, r6, r2, lsr #24\r |
319 | str r6, [r0], #-240\r |
320 | \r |
321 | subs lr, lr, #1\r |
322 | add r1, r1, #4\r |
323 | bne rotated_blit_loop8\r |
324 | \r |
325 | ldmfd sp!,{r4-r8,pc}\r |
326 | \r |
327 | \r |
328 | @ input: r2-r5\r |
329 | @ output: r7,r8\r |
330 | @ trash: r6\r |
331 | .macro rb_line_low\r |
332 | mov r6, r2, lsl #16\r |
333 | mov r7, r3, lsl #16\r |
334 | orr r7, r7, r6, lsr #16\r |
335 | mov r6, r4, lsl #16\r |
336 | mov r8, r5, lsl #16\r |
337 | orr r8, r8, r6, lsr #16\r |
338 | .endm\r |
cc68a136 |
339 | \r |
cc41eb4f |
340 | .macro rb_line_hi\r |
341 | mov r6, r2, lsr #16\r |
342 | mov r7, r3, lsr #16\r |
343 | orr r7, r6, r7, lsl #16\r |
344 | mov r6, r4, lsr #16\r |
345 | mov r8, r5, lsr #16\r |
346 | orr r8, r6, r8, lsl #16\r |
347 | .endm\r |
cc68a136 |
348 | \r |
cc41eb4f |
349 | .global rotated_blit16 @ void *dst, void *linesx4, u32 y, int is_32col\r |
350 | rotated_blit16:\r |
351 | stmfd sp!,{r4-r8,lr}\r |
352 | \r |
353 | add r0, r0, #(240*320)*2\r |
354 | sub r0, r0, #(240+4)*2 @ y starts from 4\r |
355 | add r0, r0, r2, lsl #1\r |
356 | \r |
357 | tst r3, r3\r |
358 | subne r0, r0, #(240*32)*2\r |
359 | addne r1, r1, #32*2\r |
360 | movne lr, #256/4\r |
361 | moveq lr, #320/4\r |
362 | \r |
363 | rotated_blit_loop16:\r |
364 | ldr r2, [r1, #320*0*2]\r |
365 | ldr r3, [r1, #320*1*2]\r |
366 | ldr r4, [r1, #320*2*2]\r |
367 | ldr r5, [r1, #320*3*2]\r |
368 | rb_line_low\r |
369 | stmia r0, {r7,r8}\r |
370 | sub r0, r0, #240*2\r |
371 | rb_line_hi\r |
372 | stmia r0, {r7,r8}\r |
373 | sub r0, r0, #240*2\r |
374 | \r |
375 | ldr r2, [r1, #320*0*2+4]\r |
376 | ldr r3, [r1, #320*1*2+4]\r |
377 | ldr r4, [r1, #320*2*2+4]\r |
378 | ldr r5, [r1, #320*3*2+4]\r |
379 | rb_line_low\r |
380 | stmia r0, {r7,r8}\r |
381 | sub r0, r0, #240*2\r |
382 | rb_line_hi\r |
383 | stmia r0, {r7,r8}\r |
384 | sub r0, r0, #240*2\r |
385 | \r |
386 | subs lr, lr, #1\r |
cc68a136 |
387 | add r1, r1, #8\r |
cc41eb4f |
388 | bne rotated_blit_loop16\r |
389 | \r |
390 | ldmfd sp!,{r4-r8,pc}\r |
cc68a136 |
391 | \r |
392 | \r |
cc68a136 |
393 | .global spend_cycles @ c\r |
394 | \r |
395 | spend_cycles:\r |
396 | mov r0, r0, lsr #2 @ 4 cycles/iteration\r |
397 | sub r0, r0, #2 @ entry/exit/init\r |
398 | .sc_loop:\r |
399 | subs r0, r0, #1\r |
400 | bpl .sc_loop\r |
401 | \r |
402 | bx lr\r |
4f265db7 |
403 | \r |
cff531af |
404 | @ vim:filetype=armasm\r |