perfect vsync, bugfixes
[fceu.git] / drivers / gp2x / asmutils.s
... / ...
CommitLineData
1@ vim:filetype=armasm
2
3@ Assembly optimized routines for gpfce - FCE Ultra port
4@ (c) Copyright 2007, Grazvydas "notaz" Ignotas
5
6@ test
7.global flushcache @ beginning_addr, end_addr, flags
8
9flushcache:
10 swi #0x9f0002
11 mov pc, lr
12
13
14.global block_or @ void *src, size_t n, int pat
15
16block_or:
17 stmfd sp!, {r4-r5}
18 orr r2, r2, r2, lsl #8
19 orr r2, r2, r2, lsl #16
20 mov r1, r1, lsr #4
21block_loop_or:
22 ldmia r0, {r3-r5,r12}
23 subs r1, r1, #1
24 orr r3, r3, r2
25 orr r4, r4, r2
26 orr r5, r5, r2
27 orr r12,r12,r2
28 stmia r0!, {r3-r5,r12}
29 bne block_loop_or
30 ldmfd sp!, {r4-r5}
31 bx lr
32
33
34.global block_and @ void *src, size_t n, int andpat
35
36block_and:
37 stmfd sp!, {r4-r5}
38 orr r2, r2, r2, lsl #8
39 orr r2, r2, r2, lsl #16
40 mov r1, r1, lsr #4
41block_loop_and:
42 ldmia r0, {r3-r5,r12}
43 subs r1, r1, #1
44 and r3, r3, r2
45 and r4, r4, r2
46 and r5, r5, r2
47 and r12,r12,r2
48 stmia r0!, {r3-r5,r12}
49 bne block_loop_and
50 ldmfd sp!, {r4-r5}
51 bx lr
52
53
54.global block_andor @ void *src, size_t n, int andpat, int orpat
55
56block_andor:
57 stmfd sp!, {r4-r6}
58 orr r2, r2, r2, lsl #8
59 orr r2, r2, r2, lsl #16
60 orr r3, r3, r3, lsl #8
61 orr r3, r3, r3, lsl #16
62 mov r1, r1, lsr #4
63block_loop_andor:
64 ldmia r0, {r4-r6,r12}
65 subs r1, r1, #1
66 and r4, r4, r2
67 orr r4, r4, r3
68 and r5, r5, r2
69 orr r5, r5, r3
70 and r6, r6, r2
71 orr r6, r6, r3
72 and r12,r12,r2
73 orr r12,r12,r3
74 stmia r0!, {r4-r6,r12}
75 bne block_loop_andor
76 ldmfd sp!, {r4-r6}
77 bx lr
78
79
80.global spend_cycles @ c
81
82spend_cycles:
83 mov r0, r0, lsr #2 @ 4 cycles/iteration
84 sub r0, r0, #2 @ entry/exit/init
85.sc_loop:
86 subs r0, r0, #1
87 bpl .sc_loop
88
89 bx lr
90
91
92.global memset32 @ int *dest, int c, int count
93
94memset32:
95 stmfd sp!, {lr}
96
97 mov r3, r1
98 subs r2, r2, #4
99 bmi mst32_fin
100
101 mov r12,r1
102 mov lr, r1
103
104mst32_loop:
105 subs r2, r2, #4
106 stmia r0!, {r1,r3,r12,lr}
107 bpl mst32_loop
108
109mst32_fin:
110 tst r2, #1
111 strne r1, [r0], #4
112
113 tst r2, #2
114 stmneia r0!, {r1,r3}
115
116 ldmfd sp!, {lr}
117 bx lr
118
119
120@ warning: this code relies on palette being strictly RGB555, i.e. bit5=0
121.global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines
122
123soft_scale:
124 stmfd sp!,{r4-r11,lr}
125 mov lr, #0xff
126 mov lr, lr, lsl #1
127 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
128 orr r9, r9, #0x00e7
129
130 mov r11,r3 @ r11= line counter
131 mov r3, r1 @ r3 = pal base
132
133 mov r12,#320
134 mul r2, r12,r2
135 add r4, r0, r2, lsl #1 @ r4 = dst_start
136 add r5, r0, r2 @ r5 = src_start
137 mul r12,r11,r12
138 add r0, r4, r12,lsl #1 @ r0 = dst_end
139 add r1, r5, r12 @ r1 = src_end
140
141 mov r2, r11
142
143soft_scale_loop:
144 sub r1, r1, #64 @ skip borders
145 orr r2, r2, #(256/8-1)<<24
146
147soft_scale_loop_line:
148 ldr r12, [r1, #-8]!
149 ldr r7, [r1, #4]
150
151 and r4, lr, r12,lsl #1
152 ldrh r4, [r3, r4]
153 and r5, lr, r12,lsr #7
154 ldrh r5, [r3, r5]
155 and r11,r4, r9, lsl #2
156 orr r4, r4, r11,lsl #14 @ r4[31:16] = 1/4 pix_s 0
157 and r11,r5, r9, lsl #2
158 sub r6, r5, r11,lsr #2 @ r6 = 3/4 pix_s 1
159 add r4, r4, r6, lsl #16 @ pix_d 0, 1
160 and r6, lr, r12,lsr #15
161 ldrh r6, [r3, r6]
162 and r12,lr, r12,lsr #23
163 ldrh r12,[r3, r12]
164
165 mov r11,r6, ror #11
166 adds r5, r11,r5, ror #11
167 mov r5, r5, ror #22
168 bic r5, r5, #0xff000000
169 bic r5, r5, #0x0420 @ set the green bits as they should be
170 orrcs r5, r5, #0x0400
171
172 and r11,r6, r9, lsl #2
173 sub r6, r6, r11,lsr #2 @ r6 = 3/4 pix_s 2
174 orr r5, r5, r6, lsl #16
175
176 and r6, lr, r7, lsl #1
177 ldrh r6, [r3, r6]
178 and r11,r12,r9, lsl #2
179 add r5, r5, r11,lsl #14 @ pix_d 2, 3
180 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
181
182 and r12,lr, r7, lsr #7
183 ldrh r12,[r3, r12]
184 and r10,lr, r7, lsr #15
185 ldrh r10,[r3, r10]
186 and r11,r12,r9, lsl #2
187 sub r8, r12,r11,lsr #2 @ r8 = 3/4 pix_s 1
188 and r11,r6, r9, lsl #18
189 add r8, r8, r11,lsr #18
190 and r7, lr, r7, lsr #23
191 ldrh r7, [r3, r7]
192
193 mov r11,r10,ror #11
194 adds r12,r11,r12,ror #11
195 mov r12,r12,ror #22
196 bic r12,r12,#0x0420
197 orrcs r12,r12,#0x0400
198 orr r8, r8, r12,lsl #16 @ pix_d 6, 7
199
200 and r11,r10,r9, lsl #2
201 sub r10,r10,r11,lsr #2 @ r10= 3/4 pix_s 2
202 and r11,r7, r9, lsl #2
203 add r10,r10,r11,lsr #2 @ += 1/4 pix_s 3
204 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
205
206 subs r2, r2, #1<<24
207
208 stmdb r0!, {r4,r5,r6,r8,r10}
209 bpl soft_scale_loop_line
210
211 add r2, r2, #1<<24
212 subs r2, r2, #1
213 bne soft_scale_loop
214
215 ldmfd sp!,{r4-r11,lr}
216 bx lr
217
218
219@ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
220
221.global convert2RGB555
222
223convert2RGB555:
224 stmfd sp!,{r4-r8,lr}
225 mov lr, #0xff
226 mov lr, lr, lsl #1
227
228 mov r3, r3, lsr #3
229
230convert2RGB555_loop:
231 ldmia r1!,{r4,r5}
232
233 and r6, lr, r4, lsl #1
234 ldrh r6, [r2, r6]
235 and r7, lr, r4, lsr #7
236 ldrh r7, [r2, r7]
237 and r8, lr, r4, lsr #15
238 ldrh r8, [r2, r8]
239 and r4, lr, r4, lsr #23
240 ldrh r4, [r2, r4]
241
242 orr r6, r6, r7, lsl #16
243 and r12,lr, r5, lsl #1
244 ldrh r12, [r2, r12]
245 orr r7, r8, r4, lsl #16
246 and r8, lr, r5, lsr #7
247 ldrh r8, [r2, r8]
248 and r4, lr, r5, lsr #15
249 ldrh r4, [r2, r4]
250 and r5, lr, r5, lsr #23
251 ldrh r5, [r2, r5]
252 orr r8, r12,r8, lsl #16
253 orr r12,r4, r5, lsl #16
254
255 stmia r0!,{r6,r7,r8,r12}
256 subs r3, r3, #1
257 bne convert2RGB555_loop
258
259 ldmfd sp!,{r4-r8,lr}
260 bx lr
261