initial pandora port
[fceu.git] / drivers / arm / asmutils.s
CommitLineData
937bf65b 1@ vim:filetype=armasm
2
f5eb372f 3@ Assembly optimized routines for gpfce - FCE Ultra port
4@ (c) Copyright 2007, Grazvydas "notaz" Ignotas
5
937bf65b 6@ test
7.global flushcache @ beginning_addr, end_addr, flags
8
9flushcache:
10 swi #0x9f0002
11 mov pc, lr
12
c0bf6f9f 13
6587f346 14.global block_or @ void *src, size_t n, int pat
15
16block_or:
17 stmfd sp!, {r4-r5}
18 orr r2, r2, r2, lsl #8
19 orr r2, r2, r2, lsl #16
20 mov r1, r1, lsr #4
21block_loop_or:
22 ldmia r0, {r3-r5,r12}
23 subs r1, r1, #1
24 orr r3, r3, r2
25 orr r4, r4, r2
26 orr r5, r5, r2
27 orr r12,r12,r2
28 stmia r0!, {r3-r5,r12}
29 bne block_loop_or
30 ldmfd sp!, {r4-r5}
31 bx lr
32
33
e328100e 34.global block_and @ void *src, size_t n, int andpat
35
36block_and:
37 stmfd sp!, {r4-r5}
38 orr r2, r2, r2, lsl #8
39 orr r2, r2, r2, lsl #16
40 mov r1, r1, lsr #4
41block_loop_and:
42 ldmia r0, {r3-r5,r12}
43 subs r1, r1, #1
44 and r3, r3, r2
45 and r4, r4, r2
46 and r5, r5, r2
47 and r12,r12,r2
48 stmia r0!, {r3-r5,r12}
49 bne block_loop_and
50 ldmfd sp!, {r4-r5}
51 bx lr
52
53
6587f346 54.global block_andor @ void *src, size_t n, int andpat, int orpat
55
56block_andor:
57 stmfd sp!, {r4-r6}
58 orr r2, r2, r2, lsl #8
59 orr r2, r2, r2, lsl #16
60 orr r3, r3, r3, lsl #8
61 orr r3, r3, r3, lsl #16
62 mov r1, r1, lsr #4
63block_loop_andor:
64 ldmia r0, {r4-r6,r12}
65 subs r1, r1, #1
66 and r4, r4, r2
67 orr r4, r4, r3
68 and r5, r5, r2
69 orr r5, r5, r3
70 and r6, r6, r2
71 orr r6, r6, r3
72 and r12,r12,r2
73 orr r12,r12,r3
74 stmia r0!, {r4-r6,r12}
75 bne block_loop_andor
76 ldmfd sp!, {r4-r6}
77 bx lr
78
79
b2b95d2e 80.global spend_cycles @ c
81
82spend_cycles:
83 mov r0, r0, lsr #2 @ 4 cycles/iteration
84 sub r0, r0, #2 @ entry/exit/init
85.sc_loop:
86 subs r0, r0, #1
87 bpl .sc_loop
88
89 bx lr
90
91
21afaa36 92.global memset32 @ int *dest, int c, int count
93
94memset32:
95 stmfd sp!, {lr}
96
97 mov r3, r1
98 subs r2, r2, #4
99 bmi mst32_fin
100
101 mov r12,r1
102 mov lr, r1
103
104mst32_loop:
105 subs r2, r2, #4
106 stmia r0!, {r1,r3,r12,lr}
107 bpl mst32_loop
108
109mst32_fin:
110 tst r2, #1
111 strne r1, [r0], #4
112
113 tst r2, #2
114 stmneia r0!, {r1,r3}
115
116 ldmfd sp!, {lr}
117 bx lr
118
119
c0623dcf 120@ warning: this code relies on palette being strictly RGB555, i.e. bit5=0
21afaa36 121.global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines
989672f4 122
123soft_scale:
124 stmfd sp!,{r4-r11,lr}
125 mov lr, #0xff
126 mov lr, lr, lsl #1
127 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
128 orr r9, r9, #0x00e7
129
130 mov r11,r3 @ r11= line counter
131 mov r3, r1 @ r3 = pal base
132
133 mov r12,#320
134 mul r2, r12,r2
135 add r4, r0, r2, lsl #1 @ r4 = dst_start
136 add r5, r0, r2 @ r5 = src_start
137 mul r12,r11,r12
138 add r0, r4, r12,lsl #1 @ r0 = dst_end
139 add r1, r5, r12 @ r1 = src_end
140
f5eb372f 141 mov r2, r11
142
989672f4 143soft_scale_loop:
144 sub r1, r1, #64 @ skip borders
f5eb372f 145 orr r2, r2, #(256/8-1)<<24
989672f4 146
147soft_scale_loop_line:
148 ldr r12, [r1, #-8]!
149 ldr r7, [r1, #4]
150
151 and r4, lr, r12,lsl #1
152 ldrh r4, [r3, r4]
153 and r5, lr, r12,lsr #7
154 ldrh r5, [r3, r5]
f5eb372f 155 and r11,r4, r9, lsl #2
156 orr r4, r4, r11,lsl #14 @ r4[31:16] = 1/4 pix_s 0
157 and r11,r5, r9, lsl #2
158 sub r6, r5, r11,lsr #2 @ r6 = 3/4 pix_s 1
989672f4 159 add r4, r4, r6, lsl #16 @ pix_d 0, 1
160 and r6, lr, r12,lsr #15
161 ldrh r6, [r3, r6]
162 and r12,lr, r12,lsr #23
163 ldrh r12,[r3, r12]
c0623dcf 164
165 mov r11,r6, ror #11
166 adds r5, r11,r5, ror #11
167 mov r5, r5, ror #22
168 bic r5, r5, #0xff000000
169 bic r5, r5, #0x0420 @ set the green bits as they should be
170 orrcs r5, r5, #0x0400
171
f5eb372f 172 and r11,r6, r9, lsl #2
173 sub r6, r6, r11,lsr #2 @ r6 = 3/4 pix_s 2
989672f4 174 orr r5, r5, r6, lsl #16
175
176 and r6, lr, r7, lsl #1
177 ldrh r6, [r3, r6]
f5eb372f 178 and r11,r12,r9, lsl #2
179 add r5, r5, r11,lsl #14 @ pix_d 2, 3
989672f4 180 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
181
182 and r12,lr, r7, lsr #7
183 ldrh r12,[r3, r12]
184 and r10,lr, r7, lsr #15
185 ldrh r10,[r3, r10]
f5eb372f 186 and r11,r12,r9, lsl #2
187 sub r8, r12,r11,lsr #2 @ r8 = 3/4 pix_s 1
188 and r11,r6, r9, lsl #18
189 add r8, r8, r11,lsr #18
989672f4 190 and r7, lr, r7, lsr #23
191 ldrh r7, [r3, r7]
c0623dcf 192
193 mov r11,r10,ror #11
194 adds r12,r11,r12,ror #11
195 mov r12,r12,ror #22
196 bic r12,r12,#0x0420
197 orrcs r12,r12,#0x0400
198 orr r8, r8, r12,lsl #16 @ pix_d 6, 7
199
f5eb372f 200 and r11,r10,r9, lsl #2
201 sub r10,r10,r11,lsr #2 @ r10= 3/4 pix_s 2
202 and r11,r7, r9, lsl #2
203 add r10,r10,r11,lsr #2 @ += 1/4 pix_s 3
989672f4 204 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
205
f5eb372f 206 subs r2, r2, #1<<24
989672f4 207
208 stmdb r0!, {r4,r5,r6,r8,r10}
f5eb372f 209 bpl soft_scale_loop_line
989672f4 210
f5eb372f 211 add r2, r2, #1<<24
212 subs r2, r2, #1
989672f4 213 bne soft_scale_loop
214
215 ldmfd sp!,{r4-r11,lr}
216 bx lr
217
6587f346 218
c8c88d89 219@ void do_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int pixels);
f5eb372f 220
c8c88d89 221.global do_clut
f5eb372f 222
c8c88d89 223do_clut:
f5eb372f 224 stmfd sp!,{r4-r8,lr}
225 mov lr, #0xff
226 mov lr, lr, lsl #1
227
228 mov r3, r3, lsr #3
229
c8c88d89 230do_clut_loop:
f5eb372f 231 ldmia r1!,{r4,r5}
232
233 and r6, lr, r4, lsl #1
f5eb372f 234 and r7, lr, r4, lsr #7
b054fd77 235 ldrh r6, [r2, r6]
f5eb372f 236 and r8, lr, r4, lsr #15
b054fd77 237 ldrh r7, [r2, r7]
f5eb372f 238 and r4, lr, r4, lsr #23
b054fd77 239 ldrh r8, [r2, r8]
f5eb372f 240 ldrh r4, [r2, r4]
241
242 orr r6, r6, r7, lsl #16
243 and r12,lr, r5, lsl #1
f5eb372f 244 orr r7, r8, r4, lsl #16
245 and r8, lr, r5, lsr #7
b054fd77 246 ldrh r12, [r2, r12]
f5eb372f 247 and r4, lr, r5, lsr #15
b054fd77 248 ldrh r8, [r2, r8]
f5eb372f 249 and r5, lr, r5, lsr #23
b054fd77 250 ldrh r4, [r2, r4]
f5eb372f 251 ldrh r5, [r2, r5]
252 orr r8, r12,r8, lsl #16
253 orr r12,r4, r5, lsl #16
254
255 stmia r0!,{r6,r7,r8,r12}
256 subs r3, r3, #1
c8c88d89 257 bne do_clut_loop
f5eb372f 258
259 ldmfd sp!,{r4-r8,lr}
260 bx lr
c0bf6f9f 261