+++ /dev/null
-@ vim:filetype=armasm
-
-@ Assembly optimized routines for gpfce - FCE Ultra port
-@ (c) Copyright 2007, Grazvydas "notaz" Ignotas
-
-@ test
-.global flushcache @ beginning_addr, end_addr, flags
-
-flushcache:
- swi #0x9f0002
- mov pc, lr
-
-
-.global block_or @ void *src, size_t n, int pat
-
-block_or:
- stmfd sp!, {r4-r5}
- orr r2, r2, r2, lsl #8
- orr r2, r2, r2, lsl #16
- mov r1, r1, lsr #4
-block_loop_or:
- ldmia r0, {r3-r5,r12}
- subs r1, r1, #1
- orr r3, r3, r2
- orr r4, r4, r2
- orr r5, r5, r2
- orr r12,r12,r2
- stmia r0!, {r3-r5,r12}
- bne block_loop_or
- ldmfd sp!, {r4-r5}
- bx lr
-
-
-.global block_and @ void *src, size_t n, int andpat
-
-block_and:
- stmfd sp!, {r4-r5}
- orr r2, r2, r2, lsl #8
- orr r2, r2, r2, lsl #16
- mov r1, r1, lsr #4
-block_loop_and:
- ldmia r0, {r3-r5,r12}
- subs r1, r1, #1
- and r3, r3, r2
- and r4, r4, r2
- and r5, r5, r2
- and r12,r12,r2
- stmia r0!, {r3-r5,r12}
- bne block_loop_and
- ldmfd sp!, {r4-r5}
- bx lr
-
-
-.global block_andor @ void *src, size_t n, int andpat, int orpat
-
-block_andor:
- stmfd sp!, {r4-r6}
- orr r2, r2, r2, lsl #8
- orr r2, r2, r2, lsl #16
- orr r3, r3, r3, lsl #8
- orr r3, r3, r3, lsl #16
- mov r1, r1, lsr #4
-block_loop_andor:
- ldmia r0, {r4-r6,r12}
- subs r1, r1, #1
- and r4, r4, r2
- orr r4, r4, r3
- and r5, r5, r2
- orr r5, r5, r3
- and r6, r6, r2
- orr r6, r6, r3
- and r12,r12,r2
- orr r12,r12,r3
- stmia r0!, {r4-r6,r12}
- bne block_loop_andor
- ldmfd sp!, {r4-r6}
- bx lr
-
-
-.global spend_cycles @ c
-
-spend_cycles:
- mov r0, r0, lsr #2 @ 4 cycles/iteration
- sub r0, r0, #2 @ entry/exit/init
-.sc_loop:
- subs r0, r0, #1
- bpl .sc_loop
-
- bx lr
-
-
-.global memset32 @ int *dest, int c, int count
-
-memset32:
- stmfd sp!, {lr}
-
- mov r3, r1
- subs r2, r2, #4
- bmi mst32_fin
-
- mov r12,r1
- mov lr, r1
-
-mst32_loop:
- subs r2, r2, #4
- stmia r0!, {r1,r3,r12,lr}
- bpl mst32_loop
-
-mst32_fin:
- tst r2, #1
- strne r1, [r0], #4
-
- tst r2, #2
- stmneia r0!, {r1,r3}
-
- ldmfd sp!, {lr}
- bx lr
-
-
-@ warning: this code relies on palette being strictly RGB555, i.e. bit5=0
-.global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines
-
-soft_scale:
- stmfd sp!,{r4-r11,lr}
- mov lr, #0xff
- mov lr, lr, lsl #1
- mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
- orr r9, r9, #0x00e7
-
- mov r11,r3 @ r11= line counter
- mov r3, r1 @ r3 = pal base
-
- mov r12,#320
- mul r2, r12,r2
- add r4, r0, r2, lsl #1 @ r4 = dst_start
- add r5, r0, r2 @ r5 = src_start
- mul r12,r11,r12
- add r0, r4, r12,lsl #1 @ r0 = dst_end
- add r1, r5, r12 @ r1 = src_end
-
- mov r2, r11
-
-soft_scale_loop:
- sub r1, r1, #64 @ skip borders
- orr r2, r2, #(256/8-1)<<24
-
-soft_scale_loop_line:
- ldr r12, [r1, #-8]!
- ldr r7, [r1, #4]
-
- and r4, lr, r12,lsl #1
- ldrh r4, [r3, r4]
- and r5, lr, r12,lsr #7
- ldrh r5, [r3, r5]
- and r11,r4, r9, lsl #2
- orr r4, r4, r11,lsl #14 @ r4[31:16] = 1/4 pix_s 0
- and r11,r5, r9, lsl #2
- sub r6, r5, r11,lsr #2 @ r6 = 3/4 pix_s 1
- add r4, r4, r6, lsl #16 @ pix_d 0, 1
- and r6, lr, r12,lsr #15
- ldrh r6, [r3, r6]
- and r12,lr, r12,lsr #23
- ldrh r12,[r3, r12]
-
- mov r11,r6, ror #11
- adds r5, r11,r5, ror #11
- mov r5, r5, ror #22
- bic r5, r5, #0xff000000
- bic r5, r5, #0x0420 @ set the green bits as they should be
- orrcs r5, r5, #0x0400
-
- and r11,r6, r9, lsl #2
- sub r6, r6, r11,lsr #2 @ r6 = 3/4 pix_s 2
- orr r5, r5, r6, lsl #16
-
- and r6, lr, r7, lsl #1
- ldrh r6, [r3, r6]
- and r11,r12,r9, lsl #2
- add r5, r5, r11,lsl #14 @ pix_d 2, 3
- orr r6, r12,r6, lsl #16 @ pix_d 4, 5
-
- and r12,lr, r7, lsr #7
- ldrh r12,[r3, r12]
- and r10,lr, r7, lsr #15
- ldrh r10,[r3, r10]
- and r11,r12,r9, lsl #2
- sub r8, r12,r11,lsr #2 @ r8 = 3/4 pix_s 1
- and r11,r6, r9, lsl #18
- add r8, r8, r11,lsr #18
- and r7, lr, r7, lsr #23
- ldrh r7, [r3, r7]
-
- mov r11,r10,ror #11
- adds r12,r11,r12,ror #11
- mov r12,r12,ror #22
- bic r12,r12,#0x0420
- orrcs r12,r12,#0x0400
- orr r8, r8, r12,lsl #16 @ pix_d 6, 7
-
- and r11,r10,r9, lsl #2
- sub r10,r10,r11,lsr #2 @ r10= 3/4 pix_s 2
- and r11,r7, r9, lsl #2
- add r10,r10,r11,lsr #2 @ += 1/4 pix_s 3
- orr r10,r10,r7, lsl #16 @ pix_d 8, 9
-
- subs r2, r2, #1<<24
-
- stmdb r0!, {r4,r5,r6,r8,r10}
- bpl soft_scale_loop_line
-
- add r2, r2, #1<<24
- subs r2, r2, #1
- bne soft_scale_loop
-
- ldmfd sp!,{r4-r11,lr}
- bx lr
-
-
-@ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
-
-.global convert2RGB555
-
-convert2RGB555:
- stmfd sp!,{r4-r8,lr}
- mov lr, #0xff
- mov lr, lr, lsl #1
-
- mov r3, r3, lsr #3
-
-convert2RGB555_loop:
- ldmia r1!,{r4,r5}
-
- and r6, lr, r4, lsl #1
- ldrh r6, [r2, r6]
- and r7, lr, r4, lsr #7
- ldrh r7, [r2, r7]
- and r8, lr, r4, lsr #15
- ldrh r8, [r2, r8]
- and r4, lr, r4, lsr #23
- ldrh r4, [r2, r4]
-
- orr r6, r6, r7, lsl #16
- and r12,lr, r5, lsl #1
- ldrh r12, [r2, r12]
- orr r7, r8, r4, lsl #16
- and r8, lr, r5, lsr #7
- ldrh r8, [r2, r8]
- and r4, lr, r5, lsr #15
- ldrh r4, [r2, r4]
- and r5, lr, r5, lsr #23
- ldrh r5, [r2, r5]
- orr r8, r12,r8, lsl #16
- orr r12,r4, r5, lsl #16
-
- stmia r0!,{r6,r7,r8,r12}
- subs r3, r3, #1
- bne convert2RGB555_loop
-
- ldmfd sp!,{r4-r8,lr}
- bx lr
-