937bf65b |
1 | @ vim:filetype=armasm |
2 | |
f5eb372f |
3 | @ Assembly optimized routines for gpfce - FCE Ultra port |
4 | @ (c) Copyright 2007, Grazvydas "notaz" Ignotas |
5 | |
937bf65b |
6 | @ test |
7 | .global flushcache @ beginning_addr, end_addr, flags |
8 | |
9 | flushcache: |
10 | swi #0x9f0002 |
11 | mov pc, lr |
12 | |
c0bf6f9f |
13 | |
6587f346 |
14 | .global block_or @ void *src, size_t n, int pat |
15 | |
16 | block_or: |
17 | stmfd sp!, {r4-r5} |
18 | orr r2, r2, r2, lsl #8 |
19 | orr r2, r2, r2, lsl #16 |
20 | mov r1, r1, lsr #4 |
21 | block_loop_or: |
22 | ldmia r0, {r3-r5,r12} |
23 | subs r1, r1, #1 |
24 | orr r3, r3, r2 |
25 | orr r4, r4, r2 |
26 | orr r5, r5, r2 |
27 | orr r12,r12,r2 |
28 | stmia r0!, {r3-r5,r12} |
29 | bne block_loop_or |
30 | ldmfd sp!, {r4-r5} |
31 | bx lr |
32 | |
33 | |
e328100e |
34 | .global block_and @ void *src, size_t n, int andpat |
35 | |
36 | block_and: |
37 | stmfd sp!, {r4-r5} |
38 | orr r2, r2, r2, lsl #8 |
39 | orr r2, r2, r2, lsl #16 |
40 | mov r1, r1, lsr #4 |
41 | block_loop_and: |
42 | ldmia r0, {r3-r5,r12} |
43 | subs r1, r1, #1 |
44 | and r3, r3, r2 |
45 | and r4, r4, r2 |
46 | and r5, r5, r2 |
47 | and r12,r12,r2 |
48 | stmia r0!, {r3-r5,r12} |
49 | bne block_loop_and |
50 | ldmfd sp!, {r4-r5} |
51 | bx lr |
52 | |
53 | |
6587f346 |
54 | .global block_andor @ void *src, size_t n, int andpat, int orpat |
55 | |
56 | block_andor: |
57 | stmfd sp!, {r4-r6} |
58 | orr r2, r2, r2, lsl #8 |
59 | orr r2, r2, r2, lsl #16 |
60 | orr r3, r3, r3, lsl #8 |
61 | orr r3, r3, r3, lsl #16 |
62 | mov r1, r1, lsr #4 |
63 | block_loop_andor: |
64 | ldmia r0, {r4-r6,r12} |
65 | subs r1, r1, #1 |
66 | and r4, r4, r2 |
67 | orr r4, r4, r3 |
68 | and r5, r5, r2 |
69 | orr r5, r5, r3 |
70 | and r6, r6, r2 |
71 | orr r6, r6, r3 |
72 | and r12,r12,r2 |
73 | orr r12,r12,r3 |
74 | stmia r0!, {r4-r6,r12} |
75 | bne block_loop_andor |
76 | ldmfd sp!, {r4-r6} |
77 | bx lr |
78 | |
79 | |
b2b95d2e |
80 | .global spend_cycles @ c |
81 | |
82 | spend_cycles: |
83 | mov r0, r0, lsr #2 @ 4 cycles/iteration |
84 | sub r0, r0, #2 @ entry/exit/init |
85 | .sc_loop: |
86 | subs r0, r0, #1 |
87 | bpl .sc_loop |
88 | |
89 | bx lr |
90 | |
91 | |
21afaa36 |
92 | .global memset32 @ int *dest, int c, int count |
93 | |
94 | memset32: |
95 | stmfd sp!, {lr} |
96 | |
97 | mov r3, r1 |
98 | subs r2, r2, #4 |
99 | bmi mst32_fin |
100 | |
101 | mov r12,r1 |
102 | mov lr, r1 |
103 | |
104 | mst32_loop: |
105 | subs r2, r2, #4 |
106 | stmia r0!, {r1,r3,r12,lr} |
107 | bpl mst32_loop |
108 | |
109 | mst32_fin: |
110 | tst r2, #1 |
111 | strne r1, [r0], #4 |
112 | |
113 | tst r2, #2 |
114 | stmneia r0!, {r1,r3} |
115 | |
116 | ldmfd sp!, {lr} |
117 | bx lr |
118 | |
119 | |
c0623dcf |
120 | @ warning: this code relies on palette being strictly RGB555, i.e. bit5=0 |
21afaa36 |
121 | .global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines |
989672f4 |
122 | |
123 | soft_scale: |
124 | stmfd sp!,{r4-r11,lr} |
125 | mov lr, #0xff |
126 | mov lr, lr, lsl #1 |
127 | mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 |
128 | orr r9, r9, #0x00e7 |
129 | |
130 | mov r11,r3 @ r11= line counter |
131 | mov r3, r1 @ r3 = pal base |
132 | |
133 | mov r12,#320 |
134 | mul r2, r12,r2 |
135 | add r4, r0, r2, lsl #1 @ r4 = dst_start |
136 | add r5, r0, r2 @ r5 = src_start |
137 | mul r12,r11,r12 |
138 | add r0, r4, r12,lsl #1 @ r0 = dst_end |
139 | add r1, r5, r12 @ r1 = src_end |
140 | |
f5eb372f |
141 | mov r2, r11 |
142 | |
989672f4 |
143 | soft_scale_loop: |
144 | sub r1, r1, #64 @ skip borders |
f5eb372f |
145 | orr r2, r2, #(256/8-1)<<24 |
989672f4 |
146 | |
147 | soft_scale_loop_line: |
148 | ldr r12, [r1, #-8]! |
149 | ldr r7, [r1, #4] |
150 | |
151 | and r4, lr, r12,lsl #1 |
152 | ldrh r4, [r3, r4] |
153 | and r5, lr, r12,lsr #7 |
154 | ldrh r5, [r3, r5] |
f5eb372f |
155 | and r11,r4, r9, lsl #2 |
156 | orr r4, r4, r11,lsl #14 @ r4[31:16] = 1/4 pix_s 0 |
157 | and r11,r5, r9, lsl #2 |
158 | sub r6, r5, r11,lsr #2 @ r6 = 3/4 pix_s 1 |
989672f4 |
159 | add r4, r4, r6, lsl #16 @ pix_d 0, 1 |
160 | and r6, lr, r12,lsr #15 |
161 | ldrh r6, [r3, r6] |
162 | and r12,lr, r12,lsr #23 |
163 | ldrh r12,[r3, r12] |
c0623dcf |
164 | |
165 | mov r11,r6, ror #11 |
166 | adds r5, r11,r5, ror #11 |
167 | mov r5, r5, ror #22 |
168 | bic r5, r5, #0xff000000 |
169 | bic r5, r5, #0x0420 @ set the green bits as they should be |
170 | orrcs r5, r5, #0x0400 |
171 | |
f5eb372f |
172 | and r11,r6, r9, lsl #2 |
173 | sub r6, r6, r11,lsr #2 @ r6 = 3/4 pix_s 2 |
989672f4 |
174 | orr r5, r5, r6, lsl #16 |
175 | |
176 | and r6, lr, r7, lsl #1 |
177 | ldrh r6, [r3, r6] |
f5eb372f |
178 | and r11,r12,r9, lsl #2 |
179 | add r5, r5, r11,lsl #14 @ pix_d 2, 3 |
989672f4 |
180 | orr r6, r12,r6, lsl #16 @ pix_d 4, 5 |
181 | |
182 | and r12,lr, r7, lsr #7 |
183 | ldrh r12,[r3, r12] |
184 | and r10,lr, r7, lsr #15 |
185 | ldrh r10,[r3, r10] |
f5eb372f |
186 | and r11,r12,r9, lsl #2 |
187 | sub r8, r12,r11,lsr #2 @ r8 = 3/4 pix_s 1 |
188 | and r11,r6, r9, lsl #18 |
189 | add r8, r8, r11,lsr #18 |
989672f4 |
190 | and r7, lr, r7, lsr #23 |
191 | ldrh r7, [r3, r7] |
c0623dcf |
192 | |
193 | mov r11,r10,ror #11 |
194 | adds r12,r11,r12,ror #11 |
195 | mov r12,r12,ror #22 |
196 | bic r12,r12,#0x0420 |
197 | orrcs r12,r12,#0x0400 |
198 | orr r8, r8, r12,lsl #16 @ pix_d 6, 7 |
199 | |
f5eb372f |
200 | and r11,r10,r9, lsl #2 |
201 | sub r10,r10,r11,lsr #2 @ r10= 3/4 pix_s 2 |
202 | and r11,r7, r9, lsl #2 |
203 | add r10,r10,r11,lsr #2 @ += 1/4 pix_s 3 |
989672f4 |
204 | orr r10,r10,r7, lsl #16 @ pix_d 8, 9 |
205 | |
f5eb372f |
206 | subs r2, r2, #1<<24 |
989672f4 |
207 | |
208 | stmdb r0!, {r4,r5,r6,r8,r10} |
f5eb372f |
209 | bpl soft_scale_loop_line |
989672f4 |
210 | |
f5eb372f |
211 | add r2, r2, #1<<24 |
212 | subs r2, r2, #1 |
989672f4 |
213 | bne soft_scale_loop |
214 | |
215 | ldmfd sp!,{r4-r11,lr} |
216 | bx lr |
217 | |
6587f346 |
218 | |
f5eb372f |
219 | @ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); |
220 | |
221 | .global convert2RGB555 |
222 | |
223 | convert2RGB555: |
224 | stmfd sp!,{r4-r8,lr} |
225 | mov lr, #0xff |
226 | mov lr, lr, lsl #1 |
227 | |
228 | mov r3, r3, lsr #3 |
229 | |
230 | convert2RGB555_loop: |
231 | ldmia r1!,{r4,r5} |
232 | |
233 | and r6, lr, r4, lsl #1 |
234 | ldrh r6, [r2, r6] |
235 | and r7, lr, r4, lsr #7 |
236 | ldrh r7, [r2, r7] |
237 | and r8, lr, r4, lsr #15 |
238 | ldrh r8, [r2, r8] |
239 | and r4, lr, r4, lsr #23 |
240 | ldrh r4, [r2, r4] |
241 | |
242 | orr r6, r6, r7, lsl #16 |
243 | and r12,lr, r5, lsl #1 |
244 | ldrh r12, [r2, r12] |
245 | orr r7, r8, r4, lsl #16 |
246 | and r8, lr, r5, lsr #7 |
247 | ldrh r8, [r2, r8] |
248 | and r4, lr, r5, lsr #15 |
249 | ldrh r4, [r2, r4] |
250 | and r5, lr, r5, lsr #23 |
251 | ldrh r5, [r2, r5] |
252 | orr r8, r12,r8, lsl #16 |
253 | orr r12,r4, r5, lsl #16 |
254 | |
255 | stmia r0!,{r6,r7,r8,r12} |
256 | subs r3, r3, #1 |
257 | bne convert2RGB555_loop |
258 | |
259 | ldmfd sp!,{r4-r8,lr} |
260 | bx lr |
c0bf6f9f |
261 | |