937bf65b |
1 | @ vim:filetype=armasm |
2 | |
3 | @ test |
4 | .global flushcache @ beginning_addr, end_addr, flags |
5 | |
6 | flushcache: |
7 | swi #0x9f0002 |
8 | mov pc, lr |
9 | |
c0bf6f9f |
10 | |
6587f346 |
11 | .global block_or @ void *src, size_t n, int pat |
12 | |
13 | block_or: |
14 | stmfd sp!, {r4-r5} |
15 | orr r2, r2, r2, lsl #8 |
16 | orr r2, r2, r2, lsl #16 |
17 | mov r1, r1, lsr #4 |
18 | block_loop_or: |
19 | ldmia r0, {r3-r5,r12} |
20 | subs r1, r1, #1 |
21 | orr r3, r3, r2 |
22 | orr r4, r4, r2 |
23 | orr r5, r5, r2 |
24 | orr r12,r12,r2 |
25 | stmia r0!, {r3-r5,r12} |
26 | bne block_loop_or |
27 | ldmfd sp!, {r4-r5} |
28 | bx lr |
29 | |
30 | |
e328100e |
31 | .global block_and @ void *src, size_t n, int andpat |
32 | |
33 | block_and: |
34 | stmfd sp!, {r4-r5} |
35 | orr r2, r2, r2, lsl #8 |
36 | orr r2, r2, r2, lsl #16 |
37 | mov r1, r1, lsr #4 |
38 | block_loop_and: |
39 | ldmia r0, {r3-r5,r12} |
40 | subs r1, r1, #1 |
41 | and r3, r3, r2 |
42 | and r4, r4, r2 |
43 | and r5, r5, r2 |
44 | and r12,r12,r2 |
45 | stmia r0!, {r3-r5,r12} |
46 | bne block_loop_and |
47 | ldmfd sp!, {r4-r5} |
48 | bx lr |
49 | |
50 | |
6587f346 |
51 | .global block_andor @ void *src, size_t n, int andpat, int orpat |
52 | |
53 | block_andor: |
54 | stmfd sp!, {r4-r6} |
55 | orr r2, r2, r2, lsl #8 |
56 | orr r2, r2, r2, lsl #16 |
57 | orr r3, r3, r3, lsl #8 |
58 | orr r3, r3, r3, lsl #16 |
59 | mov r1, r1, lsr #4 |
60 | block_loop_andor: |
61 | ldmia r0, {r4-r6,r12} |
62 | subs r1, r1, #1 |
63 | and r4, r4, r2 |
64 | orr r4, r4, r3 |
65 | and r5, r5, r2 |
66 | orr r5, r5, r3 |
67 | and r6, r6, r2 |
68 | orr r6, r6, r3 |
69 | and r12,r12,r2 |
70 | orr r12,r12,r3 |
71 | stmia r0!, {r4-r6,r12} |
72 | bne block_loop_andor |
73 | ldmfd sp!, {r4-r6} |
74 | bx lr |
75 | |
76 | |
b2b95d2e |
77 | .global spend_cycles @ c |
78 | |
79 | spend_cycles: |
80 | mov r0, r0, lsr #2 @ 4 cycles/iteration |
81 | sub r0, r0, #2 @ entry/exit/init |
82 | .sc_loop: |
83 | subs r0, r0, #1 |
84 | bpl .sc_loop |
85 | |
86 | bx lr |
87 | |
88 | |
989672f4 |
89 | .global soft_scale @ void *dst, unsigned short *pal, int offs, int lines |
90 | |
91 | soft_scale: |
92 | stmfd sp!,{r4-r11,lr} |
93 | mov lr, #0xff |
94 | mov lr, lr, lsl #1 |
95 | mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 |
96 | orr r9, r9, #0x00e7 |
97 | |
98 | mov r11,r3 @ r11= line counter |
99 | mov r3, r1 @ r3 = pal base |
100 | |
101 | mov r12,#320 |
102 | mul r2, r12,r2 |
103 | add r4, r0, r2, lsl #1 @ r4 = dst_start |
104 | add r5, r0, r2 @ r5 = src_start |
105 | mul r12,r11,r12 |
106 | add r0, r4, r12,lsl #1 @ r0 = dst_end |
107 | add r1, r5, r12 @ r1 = src_end |
108 | |
109 | soft_scale_loop: |
110 | sub r1, r1, #64 @ skip borders |
111 | mov r2, #256/8 |
112 | |
113 | soft_scale_loop_line: |
114 | ldr r12, [r1, #-8]! |
115 | ldr r7, [r1, #4] |
116 | |
117 | and r4, lr, r12,lsl #1 |
118 | ldrh r4, [r3, r4] |
119 | and r5, lr, r12,lsr #7 |
120 | ldrh r5, [r3, r5] |
121 | and r4, r4, r9, lsl #2 |
122 | orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0 |
123 | and r5, r5, r9, lsl #2 |
124 | sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 |
125 | add r4, r4, r6, lsl #16 @ pix_d 0, 1 |
126 | and r6, lr, r12,lsr #15 |
127 | ldrh r6, [r3, r6] |
128 | and r12,lr, r12,lsr #23 |
129 | ldrh r12,[r3, r12] |
130 | and r6, r6, r9, lsl #2 |
131 | add r5, r5, r6 |
132 | mov r5, r5, lsr #1 |
133 | sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 |
134 | orr r5, r5, r6, lsl #16 |
135 | |
136 | and r6, lr, r7, lsl #1 |
137 | ldrh r6, [r3, r6] |
138 | and r12,r12,r9, lsl #2 |
139 | add r5, r5, r12,lsl #14 @ pix_d 2, 3 |
140 | and r6, r6, r9, lsl #2 |
141 | orr r6, r12,r6, lsl #16 @ pix_d 4, 5 |
142 | |
143 | and r12,lr, r7, lsr #7 |
144 | ldrh r12,[r3, r12] |
145 | and r10,lr, r7, lsr #15 |
146 | ldrh r10,[r3, r10] |
147 | and r12,r12,r9, lsl #2 |
148 | sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 |
149 | add r8, r8, r6, lsr #18 |
150 | and r7, lr, r7, lsr #23 |
151 | ldrh r7, [r3, r7] |
152 | and r10,r10,r9, lsl #2 |
153 | orr r8, r8, r10,lsl #15 |
154 | add r8, r8, r12,lsl #15 @ pix_d 6, 7 |
155 | sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 |
156 | and r7, r7, r9, lsl #2 |
157 | add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 |
158 | orr r10,r10,r7, lsl #16 @ pix_d 8, 9 |
159 | |
160 | subs r2, r2, #1 |
161 | |
162 | stmdb r0!, {r4,r5,r6,r8,r10} |
163 | bne soft_scale_loop_line |
164 | |
165 | subs r11,r11,#1 |
166 | bne soft_scale_loop |
167 | |
168 | ldmfd sp!,{r4-r11,lr} |
169 | bx lr |
170 | |
6587f346 |
171 | |
c0bf6f9f |
172 | /* buggy and slow, probably because function call overhead |
173 | @ renderer helper, based on bitbank's method |
174 | .global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal |
175 | |
176 | draw8pix: |
177 | stmfd sp!, {r4,r5} |
178 | |
179 | ldrb r3, [r1] @ get bit 0 pixels |
180 | mov r12,#1 |
181 | orr r12,r12,r12,lsl #8 |
182 | orr r12,r12,r12,lsl #16 |
183 | ldrb r1, [r1, #8] @ get bit 1 pixels |
184 | orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit |
185 | orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes |
186 | and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want |
187 | and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want |
188 | ldr r2, [r2] |
189 | |
190 | orr r1, r1, r1, lsl #9 @ process the bit 1 pixels |
191 | orr r1, r1, r1, lsl #18 |
192 | and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want |
193 | and r1, r12,r1, lsr #3 @ mask off the lower nibble |
194 | orr r4, r4, r3, lsl #1 |
195 | orr r5, r5, r1, lsl #5 |
196 | |
197 | @ can this be avoided? |
198 | mov r4, r4, lsl #3 @ *8 |
199 | mov r3, r2, ror r4 |
200 | strb r3, [r0], #1 |
201 | mov r4, r4, lsr #8 |
202 | mov r3, r2, ror r4 |
203 | strb r3, [r0], #1 |
204 | mov r4, r4, lsr #8 |
205 | mov r3, r2, ror r4 |
206 | strb r3, [r0], #1 |
207 | mov r4, r4, lsr #8 |
208 | mov r3, r2, ror r4 |
209 | strb r3, [r0], #1 |
210 | |
211 | mov r5, r5, lsl #3 @ *8 |
212 | mov r3, r2, ror r5 |
213 | strb r3, [r0], #1 |
214 | mov r5, r5, lsr #8 |
215 | mov r3, r2, ror r5 |
216 | strb r3, [r0], #1 |
217 | mov r5, r5, lsr #8 |
218 | mov r3, r2, ror r5 |
219 | strb r3, [r0], #1 |
220 | mov r5, r5, lsr #8 |
221 | mov r3, r2, ror r5 |
222 | strb r3, [r0], #1 |
223 | |
224 | ldmfd sp!, {r4,r5} |
225 | bx lr |
226 | */ |
227 | |