soft scaler improved, menu bgs fixed
[fceu.git] / drivers / gp2x / asmutils.s
CommitLineData
937bf65b 1@ vim:filetype=armasm
2
f5eb372f 3@ Assembly optimized routines for gpfce - FCE Ultra port
4@ (c) Copyright 2007, Grazvydas "notaz" Ignotas
5
937bf65b 6@ test
7.global flushcache @ beginning_addr, end_addr, flags
8
9flushcache:
10 swi #0x9f0002
11 mov pc, lr
12
c0bf6f9f 13
6587f346 14.global block_or @ void *src, size_t n, int pat
15
16block_or:
17 stmfd sp!, {r4-r5}
18 orr r2, r2, r2, lsl #8
19 orr r2, r2, r2, lsl #16
20 mov r1, r1, lsr #4
21block_loop_or:
22 ldmia r0, {r3-r5,r12}
23 subs r1, r1, #1
24 orr r3, r3, r2
25 orr r4, r4, r2
26 orr r5, r5, r2
27 orr r12,r12,r2
28 stmia r0!, {r3-r5,r12}
29 bne block_loop_or
30 ldmfd sp!, {r4-r5}
31 bx lr
32
33
e328100e 34.global block_and @ void *src, size_t n, int andpat
35
36block_and:
37 stmfd sp!, {r4-r5}
38 orr r2, r2, r2, lsl #8
39 orr r2, r2, r2, lsl #16
40 mov r1, r1, lsr #4
41block_loop_and:
42 ldmia r0, {r3-r5,r12}
43 subs r1, r1, #1
44 and r3, r3, r2
45 and r4, r4, r2
46 and r5, r5, r2
47 and r12,r12,r2
48 stmia r0!, {r3-r5,r12}
49 bne block_loop_and
50 ldmfd sp!, {r4-r5}
51 bx lr
52
53
6587f346 54.global block_andor @ void *src, size_t n, int andpat, int orpat
55
56block_andor:
57 stmfd sp!, {r4-r6}
58 orr r2, r2, r2, lsl #8
59 orr r2, r2, r2, lsl #16
60 orr r3, r3, r3, lsl #8
61 orr r3, r3, r3, lsl #16
62 mov r1, r1, lsr #4
63block_loop_andor:
64 ldmia r0, {r4-r6,r12}
65 subs r1, r1, #1
66 and r4, r4, r2
67 orr r4, r4, r3
68 and r5, r5, r2
69 orr r5, r5, r3
70 and r6, r6, r2
71 orr r6, r6, r3
72 and r12,r12,r2
73 orr r12,r12,r3
74 stmia r0!, {r4-r6,r12}
75 bne block_loop_andor
76 ldmfd sp!, {r4-r6}
77 bx lr
78
79
b2b95d2e 80.global spend_cycles @ c
81
82spend_cycles:
83 mov r0, r0, lsr #2 @ 4 cycles/iteration
84 sub r0, r0, #2 @ entry/exit/init
85.sc_loop:
86 subs r0, r0, #1
87 bpl .sc_loop
88
89 bx lr
90
91
21afaa36 92.global memset32 @ int *dest, int c, int count
93
94memset32:
95 stmfd sp!, {lr}
96
97 mov r3, r1
98 subs r2, r2, #4
99 bmi mst32_fin
100
101 mov r12,r1
102 mov lr, r1
103
104mst32_loop:
105 subs r2, r2, #4
106 stmia r0!, {r1,r3,r12,lr}
107 bpl mst32_loop
108
109mst32_fin:
110 tst r2, #1
111 strne r1, [r0], #4
112
113 tst r2, #2
114 stmneia r0!, {r1,r3}
115
116 ldmfd sp!, {lr}
117 bx lr
118
119
120
121.global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines
989672f4 122
123soft_scale:
124 stmfd sp!,{r4-r11,lr}
125 mov lr, #0xff
126 mov lr, lr, lsl #1
127 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
128 orr r9, r9, #0x00e7
129
130 mov r11,r3 @ r11= line counter
131 mov r3, r1 @ r3 = pal base
132
133 mov r12,#320
134 mul r2, r12,r2
135 add r4, r0, r2, lsl #1 @ r4 = dst_start
136 add r5, r0, r2 @ r5 = src_start
137 mul r12,r11,r12
138 add r0, r4, r12,lsl #1 @ r0 = dst_end
139 add r1, r5, r12 @ r1 = src_end
140
f5eb372f 141 mov r2, r11
142
989672f4 143soft_scale_loop:
144 sub r1, r1, #64 @ skip borders
f5eb372f 145 orr r2, r2, #(256/8-1)<<24
989672f4 146
147soft_scale_loop_line:
148 ldr r12, [r1, #-8]!
149 ldr r7, [r1, #4]
150
151 and r4, lr, r12,lsl #1
152 ldrh r4, [r3, r4]
153 and r5, lr, r12,lsr #7
154 ldrh r5, [r3, r5]
f5eb372f 155 and r11,r4, r9, lsl #2
156 orr r4, r4, r11,lsl #14 @ r4[31:16] = 1/4 pix_s 0
157 and r11,r5, r9, lsl #2
158 sub r6, r5, r11,lsr #2 @ r6 = 3/4 pix_s 1
989672f4 159 add r4, r4, r6, lsl #16 @ pix_d 0, 1
160 and r6, lr, r12,lsr #15
161 ldrh r6, [r3, r6]
162 and r12,lr, r12,lsr #23
163 ldrh r12,[r3, r12]
f5eb372f 164 bic r11,r6, #0x0820
165 bic r5, r5, #0x0820
166 add r5, r5, r11
989672f4 167 mov r5, r5, lsr #1
f5eb372f 168 and r11,r6, r9, lsl #2
169 sub r6, r6, r11,lsr #2 @ r6 = 3/4 pix_s 2
989672f4 170 orr r5, r5, r6, lsl #16
171
172 and r6, lr, r7, lsl #1
173 ldrh r6, [r3, r6]
f5eb372f 174 and r11,r12,r9, lsl #2
175 add r5, r5, r11,lsl #14 @ pix_d 2, 3
989672f4 176 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
177
178 and r12,lr, r7, lsr #7
179 ldrh r12,[r3, r12]
180 and r10,lr, r7, lsr #15
181 ldrh r10,[r3, r10]
f5eb372f 182 and r11,r12,r9, lsl #2
183 sub r8, r12,r11,lsr #2 @ r8 = 3/4 pix_s 1
184 and r11,r6, r9, lsl #18
185 add r8, r8, r11,lsr #18
186 mov r8, r8, lsl #16
989672f4 187 and r7, lr, r7, lsr #23
188 ldrh r7, [r3, r7]
f5eb372f 189 bic r11,r10,#0x0820
190 bic r12,r12,#0x0820
191 add r12,r12,r11
192 add r8, r8, r12,lsr #1 @ pix_d 6, 7
193 mov r8, r8, ror #16
194 and r11,r10,r9, lsl #2
195 sub r10,r10,r11,lsr #2 @ r10= 3/4 pix_s 2
196 and r11,r7, r9, lsl #2
197 add r10,r10,r11,lsr #2 @ += 1/4 pix_s 3
989672f4 198 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
199
f5eb372f 200 subs r2, r2, #1<<24
989672f4 201
202 stmdb r0!, {r4,r5,r6,r8,r10}
f5eb372f 203 bpl soft_scale_loop_line
989672f4 204
f5eb372f 205 add r2, r2, #1<<24
206 subs r2, r2, #1
989672f4 207 bne soft_scale_loop
208
209 ldmfd sp!,{r4-r11,lr}
210 bx lr
211
6587f346 212
f5eb372f 213@ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
214
215.global convert2RGB555
216
217convert2RGB555:
218 stmfd sp!,{r4-r8,lr}
219 mov lr, #0xff
220 mov lr, lr, lsl #1
221
222 mov r3, r3, lsr #3
223
224convert2RGB555_loop:
225 ldmia r1!,{r4,r5}
226
227 and r6, lr, r4, lsl #1
228 ldrh r6, [r2, r6]
229 and r7, lr, r4, lsr #7
230 ldrh r7, [r2, r7]
231 and r8, lr, r4, lsr #15
232 ldrh r8, [r2, r8]
233 and r4, lr, r4, lsr #23
234 ldrh r4, [r2, r4]
235
236 orr r6, r6, r7, lsl #16
237 and r12,lr, r5, lsl #1
238 ldrh r12, [r2, r12]
239 orr r7, r8, r4, lsl #16
240 and r8, lr, r5, lsr #7
241 ldrh r8, [r2, r8]
242 and r4, lr, r5, lsr #15
243 ldrh r4, [r2, r4]
244 and r5, lr, r5, lsr #23
245 ldrh r5, [r2, r5]
246 orr r8, r12,r8, lsl #16
247 orr r12,r4, r5, lsl #16
248
249 stmia r0!,{r6,r7,r8,r12}
250 subs r3, r3, #1
251 bne convert2RGB555_loop
252
253 ldmfd sp!,{r4-r8,lr}
254 bx lr
c0bf6f9f 255