3 @ Assembly optimized routines for gpfce - FCE Ultra port
4 @ (c) Copyright 2007, Grazvydas "notaz" Ignotas
7 .global flushcache @ beginning_addr, end_addr, flags
14 .global block_or @ void *src, size_t n, int pat
18 orr r2, r2, r2, lsl #8
19 orr r2, r2, r2, lsl #16
28 stmia r0!, {r3-r5,r12}
34 .global block_and @ void *src, size_t n, int andpat
38 orr r2, r2, r2, lsl #8
39 orr r2, r2, r2, lsl #16
48 stmia r0!, {r3-r5,r12}
54 .global block_andor @ void *src, size_t n, int andpat, int orpat
58 orr r2, r2, r2, lsl #8
59 orr r2, r2, r2, lsl #16
60 orr r3, r3, r3, lsl #8
61 orr r3, r3, r3, lsl #16
74 stmia r0!, {r4-r6,r12}
80 .global spend_cycles @ c
83 mov r0, r0, lsr #2 @ 4 cycles/iteration
84 sub r0, r0, #2 @ entry/exit/init
92 .global memset32 @ int *dest, int c, int count
106 stmia r0!, {r1,r3,r12,lr}
121 .global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines
124 stmfd sp!,{r4-r11,lr}
127 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
130 mov r11,r3 @ r11= line counter
131 mov r3, r1 @ r3 = pal base
135 add r4, r0, r2, lsl #1 @ r4 = dst_start
136 add r5, r0, r2 @ r5 = src_start
138 add r0, r4, r12,lsl #1 @ r0 = dst_end
139 add r1, r5, r12 @ r1 = src_end
144 sub r1, r1, #64 @ skip borders
145 orr r2, r2, #(256/8-1)<<24
147 soft_scale_loop_line:
151 and r4, lr, r12,lsl #1
153 and r5, lr, r12,lsr #7
155 and r11,r4, r9, lsl #2
156 orr r4, r4, r11,lsl #14 @ r4[31:16] = 1/4 pix_s 0
157 and r11,r5, r9, lsl #2
158 sub r6, r5, r11,lsr #2 @ r6 = 3/4 pix_s 1
159 add r4, r4, r6, lsl #16 @ pix_d 0, 1
160 and r6, lr, r12,lsr #15
162 and r12,lr, r12,lsr #23
168 and r11,r6, r9, lsl #2
169 sub r6, r6, r11,lsr #2 @ r6 = 3/4 pix_s 2
170 orr r5, r5, r6, lsl #16
172 and r6, lr, r7, lsl #1
174 and r11,r12,r9, lsl #2
175 add r5, r5, r11,lsl #14 @ pix_d 2, 3
176 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
178 and r12,lr, r7, lsr #7
180 and r10,lr, r7, lsr #15
182 and r11,r12,r9, lsl #2
183 sub r8, r12,r11,lsr #2 @ r8 = 3/4 pix_s 1
184 and r11,r6, r9, lsl #18
185 add r8, r8, r11,lsr #18
187 and r7, lr, r7, lsr #23
192 add r8, r8, r12,lsr #1 @ pix_d 6, 7
194 and r11,r10,r9, lsl #2
195 sub r10,r10,r11,lsr #2 @ r10= 3/4 pix_s 2
196 and r11,r7, r9, lsl #2
197 add r10,r10,r11,lsr #2 @ += 1/4 pix_s 3
198 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
202 stmdb r0!, {r4,r5,r6,r8,r10}
203 bpl soft_scale_loop_line
209 ldmfd sp!,{r4-r11,lr}
213 @ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
215 .global convert2RGB555
227 and r6, lr, r4, lsl #1
229 and r7, lr, r4, lsr #7
231 and r8, lr, r4, lsr #15
233 and r4, lr, r4, lsr #23
236 orr r6, r6, r7, lsl #16
237 and r12,lr, r5, lsl #1
239 orr r7, r8, r4, lsl #16
240 and r8, lr, r5, lsr #7
242 and r4, lr, r5, lsr #15
244 and r5, lr, r5, lsr #23
246 orr r8, r12,r8, lsl #16
247 orr r12,r4, r5, lsl #16
249 stmia r0!,{r6,r7,r8,r12}
251 bne convert2RGB555_loop