fceu options + support code
[fceu.git] / drivers / gp2x / asmutils.s
CommitLineData
937bf65b 1@ vim:filetype=armasm
2
3@ test
4.global flushcache @ beginning_addr, end_addr, flags
5
6flushcache:
7 swi #0x9f0002
8 mov pc, lr
9
c0bf6f9f 10
6587f346 11.global block_or @ void *src, size_t n, int pat
12
13block_or:
14 stmfd sp!, {r4-r5}
15 orr r2, r2, r2, lsl #8
16 orr r2, r2, r2, lsl #16
17 mov r1, r1, lsr #4
18block_loop_or:
19 ldmia r0, {r3-r5,r12}
20 subs r1, r1, #1
21 orr r3, r3, r2
22 orr r4, r4, r2
23 orr r5, r5, r2
24 orr r12,r12,r2
25 stmia r0!, {r3-r5,r12}
26 bne block_loop_or
27 ldmfd sp!, {r4-r5}
28 bx lr
29
30
e328100e 31.global block_and @ void *src, size_t n, int andpat
32
33block_and:
34 stmfd sp!, {r4-r5}
35 orr r2, r2, r2, lsl #8
36 orr r2, r2, r2, lsl #16
37 mov r1, r1, lsr #4
38block_loop_and:
39 ldmia r0, {r3-r5,r12}
40 subs r1, r1, #1
41 and r3, r3, r2
42 and r4, r4, r2
43 and r5, r5, r2
44 and r12,r12,r2
45 stmia r0!, {r3-r5,r12}
46 bne block_loop_and
47 ldmfd sp!, {r4-r5}
48 bx lr
49
50
6587f346 51.global block_andor @ void *src, size_t n, int andpat, int orpat
52
53block_andor:
54 stmfd sp!, {r4-r6}
55 orr r2, r2, r2, lsl #8
56 orr r2, r2, r2, lsl #16
57 orr r3, r3, r3, lsl #8
58 orr r3, r3, r3, lsl #16
59 mov r1, r1, lsr #4
60block_loop_andor:
61 ldmia r0, {r4-r6,r12}
62 subs r1, r1, #1
63 and r4, r4, r2
64 orr r4, r4, r3
65 and r5, r5, r2
66 orr r5, r5, r3
67 and r6, r6, r2
68 orr r6, r6, r3
69 and r12,r12,r2
70 orr r12,r12,r3
71 stmia r0!, {r4-r6,r12}
72 bne block_loop_andor
73 ldmfd sp!, {r4-r6}
74 bx lr
75
76
b2b95d2e 77.global spend_cycles @ c
78
79spend_cycles:
80 mov r0, r0, lsr #2 @ 4 cycles/iteration
81 sub r0, r0, #2 @ entry/exit/init
82.sc_loop:
83 subs r0, r0, #1
84 bpl .sc_loop
85
86 bx lr
87
88
21afaa36 89.global memset32 @ int *dest, int c, int count
90
91memset32:
92 stmfd sp!, {lr}
93
94 mov r3, r1
95 subs r2, r2, #4
96 bmi mst32_fin
97
98 mov r12,r1
99 mov lr, r1
100
101mst32_loop:
102 subs r2, r2, #4
103 stmia r0!, {r1,r3,r12,lr}
104 bpl mst32_loop
105
106mst32_fin:
107 tst r2, #1
108 strne r1, [r0], #4
109
110 tst r2, #2
111 stmneia r0!, {r1,r3}
112
113 ldmfd sp!, {lr}
114 bx lr
115
116
117
118.global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines
989672f4 119
120soft_scale:
121 stmfd sp!,{r4-r11,lr}
122 mov lr, #0xff
123 mov lr, lr, lsl #1
124 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
125 orr r9, r9, #0x00e7
126
127 mov r11,r3 @ r11= line counter
128 mov r3, r1 @ r3 = pal base
129
130 mov r12,#320
131 mul r2, r12,r2
132 add r4, r0, r2, lsl #1 @ r4 = dst_start
133 add r5, r0, r2 @ r5 = src_start
134 mul r12,r11,r12
135 add r0, r4, r12,lsl #1 @ r0 = dst_end
136 add r1, r5, r12 @ r1 = src_end
137
138soft_scale_loop:
139 sub r1, r1, #64 @ skip borders
140 mov r2, #256/8
141
142soft_scale_loop_line:
143 ldr r12, [r1, #-8]!
144 ldr r7, [r1, #4]
145
146 and r4, lr, r12,lsl #1
147 ldrh r4, [r3, r4]
148 and r5, lr, r12,lsr #7
149 ldrh r5, [r3, r5]
150 and r4, r4, r9, lsl #2
151 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0
152 and r5, r5, r9, lsl #2
153 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1
154 add r4, r4, r6, lsl #16 @ pix_d 0, 1
155 and r6, lr, r12,lsr #15
156 ldrh r6, [r3, r6]
157 and r12,lr, r12,lsr #23
158 ldrh r12,[r3, r12]
159 and r6, r6, r9, lsl #2
160 add r5, r5, r6
161 mov r5, r5, lsr #1
162 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2
163 orr r5, r5, r6, lsl #16
164
165 and r6, lr, r7, lsl #1
166 ldrh r6, [r3, r6]
167 and r12,r12,r9, lsl #2
168 add r5, r5, r12,lsl #14 @ pix_d 2, 3
169 and r6, r6, r9, lsl #2
170 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
171
172 and r12,lr, r7, lsr #7
173 ldrh r12,[r3, r12]
174 and r10,lr, r7, lsr #15
175 ldrh r10,[r3, r10]
176 and r12,r12,r9, lsl #2
177 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1
178 add r8, r8, r6, lsr #18
179 and r7, lr, r7, lsr #23
180 ldrh r7, [r3, r7]
181 and r10,r10,r9, lsl #2
182 orr r8, r8, r10,lsl #15
183 add r8, r8, r12,lsl #15 @ pix_d 6, 7
184 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2
185 and r7, r7, r9, lsl #2
186 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3
187 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
188
189 subs r2, r2, #1
190
191 stmdb r0!, {r4,r5,r6,r8,r10}
192 bne soft_scale_loop_line
193
194 subs r11,r11,#1
195 bne soft_scale_loop
196
197 ldmfd sp!,{r4-r11,lr}
198 bx lr
199
6587f346 200
c0bf6f9f 201/* buggy and slow, probably because function call overhead
202@ renderer helper, based on bitbank's method
203.global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal
204
205draw8pix:
206 stmfd sp!, {r4,r5}
207
208 ldrb r3, [r1] @ get bit 0 pixels
209 mov r12,#1
210 orr r12,r12,r12,lsl #8
211 orr r12,r12,r12,lsl #16
212 ldrb r1, [r1, #8] @ get bit 1 pixels
213 orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit
214 orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes
215 and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want
216 and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want
217 ldr r2, [r2]
218
219 orr r1, r1, r1, lsl #9 @ process the bit 1 pixels
220 orr r1, r1, r1, lsl #18
221 and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want
222 and r1, r12,r1, lsr #3 @ mask off the lower nibble
223 orr r4, r4, r3, lsl #1
224 orr r5, r5, r1, lsl #5
225
226 @ can this be avoided?
227 mov r4, r4, lsl #3 @ *8
228 mov r3, r2, ror r4
229 strb r3, [r0], #1
230 mov r4, r4, lsr #8
231 mov r3, r2, ror r4
232 strb r3, [r0], #1
233 mov r4, r4, lsr #8
234 mov r3, r2, ror r4
235 strb r3, [r0], #1
236 mov r4, r4, lsr #8
237 mov r3, r2, ror r4
238 strb r3, [r0], #1
239
240 mov r5, r5, lsl #3 @ *8
241 mov r3, r2, ror r5
242 strb r3, [r0], #1
243 mov r5, r5, lsr #8
244 mov r3, r2, ror r5
245 strb r3, [r0], #1
246 mov r5, r5, lsr #8
247 mov r3, r2, ror r5
248 strb r3, [r0], #1
249 mov r5, r5, lsr #8
250 mov r3, r2, ror r5
251 strb r3, [r0], #1
252
253 ldmfd sp!, {r4,r5}
254 bx lr
255*/
256