937bf65b |
1 | @ vim:filetype=armasm |
2 | |
3 | @ test |
4 | .global flushcache @ beginning_addr, end_addr, flags |
5 | |
6 | flushcache: |
7 | swi #0x9f0002 |
8 | mov pc, lr |
9 | |
c0bf6f9f |
10 | |
6587f346 |
11 | .global block_or @ void *src, size_t n, int pat |
12 | |
13 | block_or: |
14 | stmfd sp!, {r4-r5} |
15 | orr r2, r2, r2, lsl #8 |
16 | orr r2, r2, r2, lsl #16 |
17 | mov r1, r1, lsr #4 |
18 | block_loop_or: |
19 | ldmia r0, {r3-r5,r12} |
20 | subs r1, r1, #1 |
21 | orr r3, r3, r2 |
22 | orr r4, r4, r2 |
23 | orr r5, r5, r2 |
24 | orr r12,r12,r2 |
25 | stmia r0!, {r3-r5,r12} |
26 | bne block_loop_or |
27 | ldmfd sp!, {r4-r5} |
28 | bx lr |
29 | |
30 | |
e328100e |
31 | .global block_and @ void *src, size_t n, int andpat |
32 | |
33 | block_and: |
34 | stmfd sp!, {r4-r5} |
35 | orr r2, r2, r2, lsl #8 |
36 | orr r2, r2, r2, lsl #16 |
37 | mov r1, r1, lsr #4 |
38 | block_loop_and: |
39 | ldmia r0, {r3-r5,r12} |
40 | subs r1, r1, #1 |
41 | and r3, r3, r2 |
42 | and r4, r4, r2 |
43 | and r5, r5, r2 |
44 | and r12,r12,r2 |
45 | stmia r0!, {r3-r5,r12} |
46 | bne block_loop_and |
47 | ldmfd sp!, {r4-r5} |
48 | bx lr |
49 | |
50 | |
6587f346 |
51 | .global block_andor @ void *src, size_t n, int andpat, int orpat |
52 | |
53 | block_andor: |
54 | stmfd sp!, {r4-r6} |
55 | orr r2, r2, r2, lsl #8 |
56 | orr r2, r2, r2, lsl #16 |
57 | orr r3, r3, r3, lsl #8 |
58 | orr r3, r3, r3, lsl #16 |
59 | mov r1, r1, lsr #4 |
60 | block_loop_andor: |
61 | ldmia r0, {r4-r6,r12} |
62 | subs r1, r1, #1 |
63 | and r4, r4, r2 |
64 | orr r4, r4, r3 |
65 | and r5, r5, r2 |
66 | orr r5, r5, r3 |
67 | and r6, r6, r2 |
68 | orr r6, r6, r3 |
69 | and r12,r12,r2 |
70 | orr r12,r12,r3 |
71 | stmia r0!, {r4-r6,r12} |
72 | bne block_loop_andor |
73 | ldmfd sp!, {r4-r6} |
74 | bx lr |
75 | |
76 | |
b2b95d2e |
77 | .global spend_cycles @ c |
78 | |
79 | spend_cycles: |
80 | mov r0, r0, lsr #2 @ 4 cycles/iteration |
81 | sub r0, r0, #2 @ entry/exit/init |
82 | .sc_loop: |
83 | subs r0, r0, #1 |
84 | bpl .sc_loop |
85 | |
86 | bx lr |
87 | |
88 | |
21afaa36 |
89 | .global memset32 @ int *dest, int c, int count |
90 | |
91 | memset32: |
92 | stmfd sp!, {lr} |
93 | |
94 | mov r3, r1 |
95 | subs r2, r2, #4 |
96 | bmi mst32_fin |
97 | |
98 | mov r12,r1 |
99 | mov lr, r1 |
100 | |
101 | mst32_loop: |
102 | subs r2, r2, #4 |
103 | stmia r0!, {r1,r3,r12,lr} |
104 | bpl mst32_loop |
105 | |
106 | mst32_fin: |
107 | tst r2, #1 |
108 | strne r1, [r0], #4 |
109 | |
110 | tst r2, #2 |
111 | stmneia r0!, {r1,r3} |
112 | |
113 | ldmfd sp!, {lr} |
114 | bx lr |
115 | |
116 | |
117 | |
118 | .global soft_scale @ void *dst, unsigned short *pal, int line_offs, int lines |
989672f4 |
119 | |
120 | soft_scale: |
121 | stmfd sp!,{r4-r11,lr} |
122 | mov lr, #0xff |
123 | mov lr, lr, lsl #1 |
124 | mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 |
125 | orr r9, r9, #0x00e7 |
126 | |
127 | mov r11,r3 @ r11= line counter |
128 | mov r3, r1 @ r3 = pal base |
129 | |
130 | mov r12,#320 |
131 | mul r2, r12,r2 |
132 | add r4, r0, r2, lsl #1 @ r4 = dst_start |
133 | add r5, r0, r2 @ r5 = src_start |
134 | mul r12,r11,r12 |
135 | add r0, r4, r12,lsl #1 @ r0 = dst_end |
136 | add r1, r5, r12 @ r1 = src_end |
137 | |
138 | soft_scale_loop: |
139 | sub r1, r1, #64 @ skip borders |
140 | mov r2, #256/8 |
141 | |
142 | soft_scale_loop_line: |
143 | ldr r12, [r1, #-8]! |
144 | ldr r7, [r1, #4] |
145 | |
146 | and r4, lr, r12,lsl #1 |
147 | ldrh r4, [r3, r4] |
148 | and r5, lr, r12,lsr #7 |
149 | ldrh r5, [r3, r5] |
150 | and r4, r4, r9, lsl #2 |
151 | orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0 |
152 | and r5, r5, r9, lsl #2 |
153 | sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 |
154 | add r4, r4, r6, lsl #16 @ pix_d 0, 1 |
155 | and r6, lr, r12,lsr #15 |
156 | ldrh r6, [r3, r6] |
157 | and r12,lr, r12,lsr #23 |
158 | ldrh r12,[r3, r12] |
159 | and r6, r6, r9, lsl #2 |
160 | add r5, r5, r6 |
161 | mov r5, r5, lsr #1 |
162 | sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 |
163 | orr r5, r5, r6, lsl #16 |
164 | |
165 | and r6, lr, r7, lsl #1 |
166 | ldrh r6, [r3, r6] |
167 | and r12,r12,r9, lsl #2 |
168 | add r5, r5, r12,lsl #14 @ pix_d 2, 3 |
169 | and r6, r6, r9, lsl #2 |
170 | orr r6, r12,r6, lsl #16 @ pix_d 4, 5 |
171 | |
172 | and r12,lr, r7, lsr #7 |
173 | ldrh r12,[r3, r12] |
174 | and r10,lr, r7, lsr #15 |
175 | ldrh r10,[r3, r10] |
176 | and r12,r12,r9, lsl #2 |
177 | sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 |
178 | add r8, r8, r6, lsr #18 |
179 | and r7, lr, r7, lsr #23 |
180 | ldrh r7, [r3, r7] |
181 | and r10,r10,r9, lsl #2 |
182 | orr r8, r8, r10,lsl #15 |
183 | add r8, r8, r12,lsl #15 @ pix_d 6, 7 |
184 | sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 |
185 | and r7, r7, r9, lsl #2 |
186 | add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 |
187 | orr r10,r10,r7, lsl #16 @ pix_d 8, 9 |
188 | |
189 | subs r2, r2, #1 |
190 | |
191 | stmdb r0!, {r4,r5,r6,r8,r10} |
192 | bne soft_scale_loop_line |
193 | |
194 | subs r11,r11,#1 |
195 | bne soft_scale_loop |
196 | |
197 | ldmfd sp!,{r4-r11,lr} |
198 | bx lr |
199 | |
6587f346 |
200 | |
c0bf6f9f |
201 | /* buggy and slow, probably because function call overhead |
202 | @ renderer helper, based on bitbank's method |
203 | .global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal |
204 | |
205 | draw8pix: |
206 | stmfd sp!, {r4,r5} |
207 | |
208 | ldrb r3, [r1] @ get bit 0 pixels |
209 | mov r12,#1 |
210 | orr r12,r12,r12,lsl #8 |
211 | orr r12,r12,r12,lsl #16 |
212 | ldrb r1, [r1, #8] @ get bit 1 pixels |
213 | orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit |
214 | orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes |
215 | and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want |
216 | and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want |
217 | ldr r2, [r2] |
218 | |
219 | orr r1, r1, r1, lsl #9 @ process the bit 1 pixels |
220 | orr r1, r1, r1, lsl #18 |
221 | and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want |
222 | and r1, r12,r1, lsr #3 @ mask off the lower nibble |
223 | orr r4, r4, r3, lsl #1 |
224 | orr r5, r5, r1, lsl #5 |
225 | |
226 | @ can this be avoided? |
227 | mov r4, r4, lsl #3 @ *8 |
228 | mov r3, r2, ror r4 |
229 | strb r3, [r0], #1 |
230 | mov r4, r4, lsr #8 |
231 | mov r3, r2, ror r4 |
232 | strb r3, [r0], #1 |
233 | mov r4, r4, lsr #8 |
234 | mov r3, r2, ror r4 |
235 | strb r3, [r0], #1 |
236 | mov r4, r4, lsr #8 |
237 | mov r3, r2, ror r4 |
238 | strb r3, [r0], #1 |
239 | |
240 | mov r5, r5, lsl #3 @ *8 |
241 | mov r3, r2, ror r5 |
242 | strb r3, [r0], #1 |
243 | mov r5, r5, lsr #8 |
244 | mov r3, r2, ror r5 |
245 | strb r3, [r0], #1 |
246 | mov r5, r5, lsr #8 |
247 | mov r3, r2, ror r5 |
248 | strb r3, [r0], #1 |
249 | mov r5, r5, lsr #8 |
250 | mov r3, r2, ror r5 |
251 | strb r3, [r0], #1 |
252 | |
253 | ldmfd sp!, {r4,r5} |
254 | bx lr |
255 | */ |
256 | |