1 @ vim:filetype=armasm
\r
2 @ some color conversion and blitting routines
\r
4 @ (c) Copyright 2006, notaz
\r
5 @ All Rights Reserved
\r
7 .include "port_config.s"
\r
10 @ Convert 0000bbb0 ggg0rrr0 0000bbb0 ggg0rrr0
\r
11 @ to 00000000 rrr00000 ggg00000 bbb00000 ...
\r
13 @ lr = 0x00e000e0, out: r3=lower_pix, r2=higher_pix; trashes rin
\r
14 @ if sh==2, r8=0x00404040 (sh!=0 destroys flags!)
\r
15 .macro convRGB32_2 rin sh=0
\r
16 and r2, lr, \rin, lsr #4 @ blue
\r
18 orr r2, r2, r3, lsl #8 @ g0b0g0b0
\r
20 mov r3, r2, lsl #16 @ g0b00000
\r
21 and \rin,lr, \rin, ror #12 @ 00r000r0 (reversed)
\r
22 orr r3, r3, \rin, lsr #16 @ g0b000r0
\r
24 mov r3, r3, ror #17 @ shadow mode
\r
26 adds r3, r3, #0x40000000 @ green
\r
27 orrcs r3, r3, #0xe0000000
\r
29 adds r3, r3, #0x40000000
\r
30 orrcs r3, r3, #0xe0000000
\r
32 adds r3, r3, #0x40000000
\r
33 orrcs r3, r3, #0xe0000000
\r
35 orr r3, r3, r3, lsr #3
\r
37 mov r3, r3, ror #16 @ r3=low
\r
38 orr r3, r3, r3, lsr #3
\r
44 orr r2, r2, \rin, lsl #16
\r
49 adds r2, r2, #0x40000000 @ blue
\r
50 orrcs r2, r2, #0xe0000000
\r
52 adds r2, r2, #0x40000000
\r
53 orrcs r2, r2, #0xe0000000
\r
55 adds r2, r2, #0x40000000
\r
56 orrcs r2, r2, #0xe0000000
\r
58 orr r2, r2, r2, lsr #3
\r
60 orr r2, r2, r2, lsr #3
\r
67 .global vidConvCpyRGB32 @ void *to, void *from, int pixels
\r
70 stmfd sp!, {r4-r7,lr}
\r
72 mov r12, r2, lsr #3 @ repeats
\r
87 ldmfd sp!, {r4-r7,lr}
\r
91 .global vidConvCpyRGB32sh @ void *to, void *from, int pixels
\r
94 stmfd sp!, {r4-r7,lr}
\r
96 mov r12, r2, lsr #3 @ repeats
\r
111 ldmfd sp!, {r4-r7,lr}
\r
115 .global vidConvCpyRGB32hi @ void *to, void *from, int pixels
\r
118 stmfd sp!, {r4-r7,lr}
\r
120 mov r12, r2, lsr #3 @ repeats
\r
121 mov lr, #0x00e00000
\r
122 orr lr, lr, #0x00e0
\r
134 ldmfd sp!, {r4-r7,lr}
\r
138 @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
\r
140 @ -------- M2 stuff ---------
\r
142 .global vidConvCpy_90 @ void *to, void *from, int width
\r
145 stmfd sp!, {r4-r10,lr}
\r
147 mov lr, #0x00F00000
\r
148 orr lr, lr, #0x00F0
\r
150 mov r12, #224/4 @ row counter
\r
151 mov r10, r2, lsl #2 @ we do 2 pixel wide copies
\r
153 add r8, r0, #256*4 @ parallel line
\r
154 add r1, r1, #0x23000
\r
155 add r1, r1, #0x00B80 @ r1+=328*223*2+8*2
\r
158 mov r4, #0 @ fill bottom border
\r
170 @ at first this loop was written differently: src pixels were fetched with ldm's and
\r
171 @ dest was not sequential. It ran nearly 2 times slower. It seems it is very important
\r
172 @ to do sequential memory access on those items, which we have more (to offload addressing bus?).
\r
174 ldr r4, [r1], #-328*2
\r
175 ldr r5, [r1], #-328*2
\r
176 ldr r6, [r1], #-328*2
\r
177 ldr r7, [r1], #-328*2
\r
189 bne .loopM2RGB32_90
\r
191 mov r4, #0 @ top border
\r
194 stmia r0!, {r4-r6,r12}
\r
195 stmia r0!, {r4-r6,r12}
\r
196 stmia r8!, {r4-r6,r12}
\r
197 stmia r8!, {r4-r6,r12}
\r
200 ldmeqfd sp!, {r4-r10,pc} @ return
\r
202 add r0, r8, #16*4 @ set new dst pointer
\r
204 add r9, r9, #2*2 @ fix src pointer
\r
207 stmia r0!, {r4-r6,r12} @ bottom border
\r
208 stmia r0!, {r4-r6,r12}
\r
209 stmia r8!, {r4-r6,r12}
\r
210 stmia r8!, {r4-r6,r12}
\r
212 mov r12, #224/4 @ restore row counter
\r
217 @ converter for vidConvCpy_270
\r
218 @ lr = 0x00F000F0, out: r3=lower_pix, r2=higher_pix; trashes rin
\r
219 .macro convRGB32_3 rin
\r
220 and r2, lr, \rin, lsr #4 @ blue
\r
222 orr r2, r2, r3, lsl #8 @ g0b0g0b0
\r
224 mov r3, r2, lsl #16 @ g0b00000
\r
225 and \rin,lr, \rin, ror #12 @ 00r000r0 (reversed)
\r
226 orr r3, r3, \rin, lsr #16 @ g0b000r0
\r
228 mov r2, r2, lsr #16
\r
229 orr r2, r2, \rin, lsl #16
\r
232 mov \rin,r3, ror #16 @ r3=low
\r
235 @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
\r
238 @ takes byte-sized pixels from r3-r6, fetches from pal and stores to r7,r8,r10,lr
\r
240 .macro mode2_4pix shift
\r
241 and r7, r11, r3, lsr #\shift
\r
242 ldr r7, [r2, r7, lsl #2]
\r
244 and r8, r11, r4, lsr #\shift
\r
245 ldr r8, [r2, r8, lsl #2]
\r
247 and r10,r11, r5, lsr #\shift
\r
248 ldr r10,[r2, r10,lsl #2]
\r
250 and lr, r11, r6, lsr #\shift
\r
251 ldr lr, [r2, lr, lsl #2]
\r
255 .macro mode2_4pix_getpix0 dreg sreg
\r
256 and \dreg, r11, \sreg
\r
257 ldr \dreg, [r2, \dreg, lsl #2]
\r
260 .macro mode2_4pix_getpix1 dreg sreg
\r
261 and \dreg, r11, \sreg, lsr #8
\r
262 ldr \dreg, [r2, \dreg, lsl #2]
\r
265 .macro mode2_4pix_getpix2 dreg sreg
\r
266 and \dreg, r11, \sreg, lsr #16
\r
267 ldr \dreg, [r2, \dreg, lsl #2]
\r
270 .macro mode2_4pix_getpix3 dreg sreg
\r
271 and \dreg, r11, \sreg, lsr #24
\r
272 ldr \dreg, [r2, \dreg, lsl #2]
\r
275 @ takes byte-sized pixels from reg, fetches from pal and stores to r3-r6
\r
277 .macro mode2_4pix2_0 reg
\r
278 mode2_4pix_getpix0 r3, \reg
\r
279 mode2_4pix_getpix1 r4, \reg
\r
280 mode2_4pix_getpix2 r5, \reg
\r
281 mode2_4pix_getpix3 r6, \reg
\r
285 .macro mode2_4pix2_180 reg
\r
286 mode2_4pix_getpix3 r3, \reg
\r
287 mode2_4pix_getpix2 r4, \reg
\r
288 mode2_4pix_getpix1 r5, \reg
\r
289 mode2_4pix_getpix0 r6, \reg
\r
292 @ takes byte-sized pixels from reg, fetches from pal and stores to r3-r5
\r
293 @ r11=0xFF, r2=pal, r10=0xfcfcfc, r6=tmp
\r
294 .macro mode2_4pix_to3 reg is180
\r
296 mode2_4pix_getpix3 r3, \reg
\r
297 mode2_4pix_getpix2 r4, \reg
\r
299 mode2_4pix_getpix0 r3, \reg @ gathering loads cause a weird-hang
\r
300 mode2_4pix_getpix1 r4, \reg
\r
303 sub r3, r3, r3, lsr #2 @ r3 *= 0.75
\r
304 add r3, r3, r4, lsr #2 @ r3 += r4 * 0.25
\r
308 mode2_4pix_getpix1 r5, \reg
\r
309 mode2_4pix_getpix0 r6, \reg
\r
311 mode2_4pix_getpix2 r5, \reg
\r
312 mode2_4pix_getpix3 r6, \reg
\r
316 add r4, r4, r5, lsr #1 @ r4 = (r4 + r5) / 2;
\r
318 sub r6, r6, r6, lsr #2 @ r6 *= 0.75
\r
319 add r5, r6, r5, lsr #2 @ r5 = r6 + r5 * 0.25
\r
324 @ void *to, void *from, void *pal, int width
\r
325 .macro vidConvCpyM2_landscape is270
\r
326 stmfd sp!, {r4-r11,lr}
\r
330 mov r12, #(224/4-1)<<16 @ row counter
\r
331 orr r12, r12, r3, lsl #1 @ we do 4 pixel wide copies (right to left)
\r
336 add r1, r1, #0x11c00
\r
337 add r1, r1, #0x00308 @ 328*224+8
\r
341 mov r3, #0 @ fill top border
\r
347 add r7, r0, #(240+BORDER_R)*4-8*4
\r
350 add r7, r7, #(240+BORDER_R)*4-8*4
\r
353 add r7, r7, #(240+BORDER_R)*4-8*4
\r
357 0: @ .loopM2RGB32_270:
\r
358 subs r12, r12, #1<<16
\r
366 ldr r3, [r1, #-328]!
\r
367 ldr r4, [r1, #-328]!
\r
368 ldr r5, [r1, #-328]!
\r
369 ldr r6, [r1, #-328]!
\r
377 stmia r0, {r7,r8,r10,lr}
\r
378 add r0, r0, #(240+BORDER_R)*4
\r
385 stmia r0, {r7,r8,r10,lr}
\r
386 add r0, r0, #(240+BORDER_R)*4
\r
393 stmia r0, {r7,r8,r10,lr}
\r
394 add r0, r0, #(240+BORDER_R)*4
\r
401 stmia r0!,{r7,r8,r10,lr}
\r
402 sub r0, r0, #(240+BORDER_R)*4*3
\r
404 bpl 0b @ .loopM2RGB32_270
\r
406 mov r3, #0 @ bottom border
\r
412 add r0, r0, #(240+BORDER_R)*4-8*4
\r
415 add r0, r0, #(240+BORDER_R)*4-8*4
\r
418 add r0, r0, #(240+BORDER_R)*4-8*4
\r
420 nop @ phone crashes if this is commented out. Do I stress it too much?
\r
423 add r12, r12, #1<<16
\r
425 ldmeqfd sp!, {r4-r11,pc} @ return
\r
428 add r0, r0, #BORDER_R*4
\r
431 sub r9, r9, #4 @ fix src pointer
\r
437 stmia r0!, {r3-r6} @ top border
\r
439 add r7, r0, #(240+BORDER_R)*4-8*4
\r
442 add r7, r7, #(240+BORDER_R)*4-8*4
\r
445 add r7, r7, #(240+BORDER_R)*4-8*4
\r
449 orr r12, r12, #(224/4-1)<<16 @ restore row counter
\r
450 b 0b @ .loopM2RGB32_270
\r
454 .global vidConvCpy_90 @ void *to, void *from, void *pal, int width
\r
457 vidConvCpyM2_landscape 0
\r
460 .global vidConvCpy_270 @ void *to, void *from, void *pal, int width
\r
463 vidConvCpyM2_landscape 1
\r
466 .global vidConvCpy_center_0 @ void *to, void *from, void *pal
\r
468 vidConvCpy_center_0:
\r
469 stmfd sp!, {r4-r6,r11,lr}
\r
472 add r1, r1, #8 @ not border (centering 32col here)
\r
474 mov r12, #(240/4-1)<<16
\r
479 subs r12, r12, #1<<16
\r
486 adds r12, r12, #1<<16
\r
487 ldmeqfd sp!, {r4-r6,r11,pc} @ return
\r
489 add r0, r0, #BORDER_R*4
\r
492 orr r12, #(240/4-1)<<16
\r
496 .global vidConvCpy_center_180 @ void *to, void *from, void *pal
\r
498 vidConvCpy_center_180:
\r
499 stmfd sp!, {r4-r6,r11,lr}
\r
502 add r1, r1, #0x11c00
\r
503 add r1, r1, #0x002B8 @ #328*224-72
\r
505 mov r12, #(240/4-1)<<16
\r
510 subs r12, r12, #1<<16
\r
514 bpl .loopRGB32_c180
\r
517 adds r12, r12, #1<<16
\r
518 ldmeqfd sp!, {r4-r6,r11,pc} @ return
\r
520 add r0, r0, #BORDER_R*4
\r
523 orr r12, #(240/4-1)<<16
\r
527 @ note: the following code assumes that (pal[x] & 0x030303) == 0
\r
529 .global vidConvCpy_center2_40c_0 @ void *to, void *from, void *pal, int lines
\r
531 vidConvCpy_center2_40c_0:
\r
532 stmfd sp!, {r4-r6,r10,r11,lr}
\r
536 orr r10, r10, lsl #8
\r
537 orr r10, r10, lsl #8
\r
538 add r1, r1, #8 @ border
\r
540 mov r12, #(240/3-1)<<16
\r
543 .loopRGB32_c2_40c_0:
\r
545 subs r12, r12, #1<<16
\r
547 mode2_4pix_to3 lr, 0
\r
550 bpl .loopRGB32_c2_40c_0
\r
553 adds r12, r12, #1<<16
\r
554 ldmeqfd sp!, {r4-r6,r10,r11,pc} @ return
\r
556 add r0, r0, #BORDER_R*4
\r
559 orr r12, #(240/3-1)<<16
\r
560 b .loopRGB32_c2_40c_0
\r
563 .global vidConvCpy_center2_40c_180 @ void *to, void *from, void *pal, int lines
\r
565 vidConvCpy_center2_40c_180:
\r
566 stmfd sp!, {r4-r6,r10,r11,lr}
\r
570 orr r10, r10, lsl #8
\r
571 orr r10, r10, lsl #8
\r
575 @ add r1, r1, #0x11000
\r
576 @ add r1, r1, #0x00f00 @ #328*224
\r
578 mov r12, #(240/3-1)<<16
\r
583 subs r12, r12, #1<<16
\r
585 mode2_4pix_to3 lr, 1
\r
588 bpl .loop_c2_40c_180
\r
591 adds r12, r12, #1<<16
\r
592 ldmeqfd sp!, {r4-r6,r10,r11,pc} @ return
\r
594 add r0, r0, #BORDER_R*4
\r
597 orr r12, #(240/3-1)<<16
\r
601 .global vidConvCpy_center2_32c_0 @ void *to, void *from, void *pal, int lines
\r
603 vidConvCpy_center2_32c_0:
\r
604 stmfd sp!, {r4-r11,lr}
\r
607 orr r10, r10, lsl #8
\r
608 orr r10, r10, lsl #8
\r
610 add r1, r1, #8 @ border
\r
612 mov r12, #(240/15-1)<<16
\r
616 ldmia r1!, {r7-r9,lr}
\r
617 subs r12, r12, #1<<16
\r
625 mode2_4pix_to3 lr, 0
\r
630 adds r12, r12, #1<<16
\r
631 ldmeqfd sp!, {r4-r11,pc} @ return
\r
633 add r0, r0, #BORDER_R*4
\r
636 orr r12, #(240/15-1)<<16
\r
640 .global vidConvCpy_center2_32c_180 @ void *to, void *from, void *pal, int lines
\r
642 vidConvCpy_center2_32c_180:
\r
643 stmfd sp!, {r4-r11,lr}
\r
646 orr r10, r10, lsl #8
\r
647 orr r10, r10, lsl #8
\r
652 @ add r1, r1, #0x11000
\r
653 @ add r1, r1, #0x00f00 @ #328*224
\r
655 mov r12, #(240/15-1)<<16
\r
659 ldmdb r1!, {r7-r9,lr}
\r
660 subs r12, r12, #1<<16
\r
668 mode2_4pix_to3 r7, 1
\r
670 bpl .loop_c2_32c_180
\r
673 adds r12, r12, #1<<16
\r
674 ldmeqfd sp!, {r4-r11,pc} @ return
\r
676 add r0, r0, #BORDER_R*4
\r
679 orr r12, #(240/15-1)<<16
\r
683 @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
\r
686 .global vidClear @ void *to, int lines
\r
691 orr r12, r1, r12, lsl #16
\r
698 subs r12, r12, #1<<16
\r
700 stmia r0!, {r1-r3,lr}
\r
701 stmia r0!, {r1-r3,lr}
\r
702 stmia r0!, {r1-r3,lr}
\r
703 stmia r0!, {r1-r3,lr}
\r
707 adds r12, r12, #1<<16
\r
708 ldmeqfd sp!, {pc} @ return
\r
710 add r0, r0, #BORDER_R*4
\r
712 orr r12, #(240/16-1)<<16
\r
715 @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
\r
717 .equ EExecSetExceptionHandler, (90)
\r
719 .global my_SetExceptionHandler
\r
721 my_SetExceptionHandler:
\r
723 swi EExecSetExceptionHandler
\r