3 @ Assembly optimized routines for gpfce - FCE Ultra port
4 @ (c) Copyright 2007, Grazvydas "notaz" Ignotas
7 .global flushcache @ beginning_addr, end_addr, flags
14 .global block_or @ void *src, size_t n, int pat
18 orr r2, r2, r2, lsl #8
19 orr r2, r2, r2, lsl #16
28 stmia r0!, {r3-r5,r12}
34 .global block_and @ void *src, size_t n, int andpat
38 orr r2, r2, r2, lsl #8
39 orr r2, r2, r2, lsl #16
48 stmia r0!, {r3-r5,r12}
54 .global block_andor @ void *src, size_t n, int andpat, int orpat
58 orr r2, r2, r2, lsl #8
59 orr r2, r2, r2, lsl #16
60 orr r3, r3, r3, lsl #8
61 orr r3, r3, r3, lsl #16
74 stmia r0!, {r4-r6,r12}
80 .global spend_cycles @ c
83 mov r0, r0, lsr #2 @ 4 cycles/iteration
84 sub r0, r0, #2 @ entry/exit/init
92 .global memset32 @ int *dest, int c, int count
106 stmia r0!, {r1,r3,r12,lr}
120 @ warning: this code relies on palette being strictly RGB555, i.e. bit5=0
121 .global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines
124 stmfd sp!,{r4-r11,lr}
127 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
130 mov r11,r3 @ r11= line counter
131 mov r3, r1 @ r3 = pal base
135 add r4, r0, r2, lsl #1 @ r4 = dst_start
136 add r5, r0, r2 @ r5 = src_start
138 add r0, r4, r12,lsl #1 @ r0 = dst_end
139 add r1, r5, r12 @ r1 = src_end
144 sub r1, r1, #64 @ skip borders
145 orr r2, r2, #(256/8-1)<<24
147 soft_scale_loop_line:
151 and r4, lr, r12,lsl #1
153 and r5, lr, r12,lsr #7
155 and r11,r4, r9, lsl #2
156 orr r4, r4, r11,lsl #14 @ r4[31:16] = 1/4 pix_s 0
157 and r11,r5, r9, lsl #2
158 sub r6, r5, r11,lsr #2 @ r6 = 3/4 pix_s 1
159 add r4, r4, r6, lsl #16 @ pix_d 0, 1
160 and r6, lr, r12,lsr #15
162 and r12,lr, r12,lsr #23
166 adds r5, r11,r5, ror #11
168 bic r5, r5, #0xff000000
169 bic r5, r5, #0x0420 @ set the green bits as they should be
170 orrcs r5, r5, #0x0400
172 and r11,r6, r9, lsl #2
173 sub r6, r6, r11,lsr #2 @ r6 = 3/4 pix_s 2
174 orr r5, r5, r6, lsl #16
176 and r6, lr, r7, lsl #1
178 and r11,r12,r9, lsl #2
179 add r5, r5, r11,lsl #14 @ pix_d 2, 3
180 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
182 and r12,lr, r7, lsr #7
184 and r10,lr, r7, lsr #15
186 and r11,r12,r9, lsl #2
187 sub r8, r12,r11,lsr #2 @ r8 = 3/4 pix_s 1
188 and r11,r6, r9, lsl #18
189 add r8, r8, r11,lsr #18
190 and r7, lr, r7, lsr #23
194 adds r12,r11,r12,ror #11
197 orrcs r12,r12,#0x0400
198 orr r8, r8, r12,lsl #16 @ pix_d 6, 7
200 and r11,r10,r9, lsl #2
201 sub r10,r10,r11,lsr #2 @ r10= 3/4 pix_s 2
202 and r11,r7, r9, lsl #2
203 add r10,r10,r11,lsr #2 @ += 1/4 pix_s 3
204 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
208 stmdb r0!, {r4,r5,r6,r8,r10}
209 bpl soft_scale_loop_line
215 ldmfd sp!,{r4-r11,lr}
219 @ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
221 .global convert2RGB555
233 and r6, lr, r4, lsl #1
235 and r7, lr, r4, lsr #7
237 and r8, lr, r4, lsr #15
239 and r4, lr, r4, lsr #23
242 orr r6, r6, r7, lsl #16
243 and r12,lr, r5, lsl #1
245 orr r7, r8, r4, lsl #16
246 and r8, lr, r5, lsr #7
248 and r4, lr, r5, lsr #15
250 and r5, lr, r5, lsr #23
252 orr r8, r12,r8, lsl #16
253 orr r12,r4, r5, lsl #16
255 stmia r0!,{r6,r7,r8,r12}
257 bne convert2RGB555_loop