937bf65b |
1 | @ vim:filetype=armasm |
2 | |
3 | @ test |
4 | .global flushcache @ beginning_addr, end_addr, flags |
5 | |
6 | flushcache: |
7 | swi #0x9f0002 |
8 | mov pc, lr |
9 | |
c0bf6f9f |
10 | |
6587f346 |
11 | .global block_or @ void *src, size_t n, int pat |
12 | |
13 | block_or: |
14 | stmfd sp!, {r4-r5} |
15 | orr r2, r2, r2, lsl #8 |
16 | orr r2, r2, r2, lsl #16 |
17 | mov r1, r1, lsr #4 |
18 | block_loop_or: |
19 | ldmia r0, {r3-r5,r12} |
20 | subs r1, r1, #1 |
21 | orr r3, r3, r2 |
22 | orr r4, r4, r2 |
23 | orr r5, r5, r2 |
24 | orr r12,r12,r2 |
25 | stmia r0!, {r3-r5,r12} |
26 | bne block_loop_or |
27 | ldmfd sp!, {r4-r5} |
28 | bx lr |
29 | |
30 | |
31 | .global block_andor @ void *src, size_t n, int andpat, int orpat |
32 | |
33 | block_andor: |
34 | stmfd sp!, {r4-r6} |
35 | orr r2, r2, r2, lsl #8 |
36 | orr r2, r2, r2, lsl #16 |
37 | orr r3, r3, r3, lsl #8 |
38 | orr r3, r3, r3, lsl #16 |
39 | mov r1, r1, lsr #4 |
40 | block_loop_andor: |
41 | ldmia r0, {r4-r6,r12} |
42 | subs r1, r1, #1 |
43 | and r4, r4, r2 |
44 | orr r4, r4, r3 |
45 | and r5, r5, r2 |
46 | orr r5, r5, r3 |
47 | and r6, r6, r2 |
48 | orr r6, r6, r3 |
49 | and r12,r12,r2 |
50 | orr r12,r12,r3 |
51 | stmia r0!, {r4-r6,r12} |
52 | bne block_loop_andor |
53 | ldmfd sp!, {r4-r6} |
54 | bx lr |
55 | |
56 | |
b2b95d2e |
57 | .global spend_cycles @ c |
58 | |
59 | spend_cycles: |
60 | mov r0, r0, lsr #2 @ 4 cycles/iteration |
61 | sub r0, r0, #2 @ entry/exit/init |
62 | .sc_loop: |
63 | subs r0, r0, #1 |
64 | bpl .sc_loop |
65 | |
66 | bx lr |
67 | |
68 | |
989672f4 |
69 | .global soft_scale @ void *dst, unsigned short *pal, int offs, int lines |
70 | |
71 | soft_scale: |
72 | stmfd sp!,{r4-r11,lr} |
73 | mov lr, #0xff |
74 | mov lr, lr, lsl #1 |
75 | mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 |
76 | orr r9, r9, #0x00e7 |
77 | |
78 | mov r11,r3 @ r11= line counter |
79 | mov r3, r1 @ r3 = pal base |
80 | |
81 | mov r12,#320 |
82 | mul r2, r12,r2 |
83 | add r4, r0, r2, lsl #1 @ r4 = dst_start |
84 | add r5, r0, r2 @ r5 = src_start |
85 | mul r12,r11,r12 |
86 | add r0, r4, r12,lsl #1 @ r0 = dst_end |
87 | add r1, r5, r12 @ r1 = src_end |
88 | |
89 | soft_scale_loop: |
90 | sub r1, r1, #64 @ skip borders |
91 | mov r2, #256/8 |
92 | |
93 | soft_scale_loop_line: |
94 | ldr r12, [r1, #-8]! |
95 | ldr r7, [r1, #4] |
96 | |
97 | and r4, lr, r12,lsl #1 |
98 | ldrh r4, [r3, r4] |
99 | and r5, lr, r12,lsr #7 |
100 | ldrh r5, [r3, r5] |
101 | and r4, r4, r9, lsl #2 |
102 | orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0 |
103 | and r5, r5, r9, lsl #2 |
104 | sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 |
105 | add r4, r4, r6, lsl #16 @ pix_d 0, 1 |
106 | and r6, lr, r12,lsr #15 |
107 | ldrh r6, [r3, r6] |
108 | and r12,lr, r12,lsr #23 |
109 | ldrh r12,[r3, r12] |
110 | and r6, r6, r9, lsl #2 |
111 | add r5, r5, r6 |
112 | mov r5, r5, lsr #1 |
113 | sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 |
114 | orr r5, r5, r6, lsl #16 |
115 | |
116 | and r6, lr, r7, lsl #1 |
117 | ldrh r6, [r3, r6] |
118 | and r12,r12,r9, lsl #2 |
119 | add r5, r5, r12,lsl #14 @ pix_d 2, 3 |
120 | and r6, r6, r9, lsl #2 |
121 | orr r6, r12,r6, lsl #16 @ pix_d 4, 5 |
122 | |
123 | and r12,lr, r7, lsr #7 |
124 | ldrh r12,[r3, r12] |
125 | and r10,lr, r7, lsr #15 |
126 | ldrh r10,[r3, r10] |
127 | and r12,r12,r9, lsl #2 |
128 | sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 |
129 | add r8, r8, r6, lsr #18 |
130 | and r7, lr, r7, lsr #23 |
131 | ldrh r7, [r3, r7] |
132 | and r10,r10,r9, lsl #2 |
133 | orr r8, r8, r10,lsl #15 |
134 | add r8, r8, r12,lsl #15 @ pix_d 6, 7 |
135 | sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 |
136 | and r7, r7, r9, lsl #2 |
137 | add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 |
138 | orr r10,r10,r7, lsl #16 @ pix_d 8, 9 |
139 | |
140 | subs r2, r2, #1 |
141 | |
142 | stmdb r0!, {r4,r5,r6,r8,r10} |
143 | bne soft_scale_loop_line |
144 | |
145 | subs r11,r11,#1 |
146 | bne soft_scale_loop |
147 | |
148 | ldmfd sp!,{r4-r11,lr} |
149 | bx lr |
150 | |
6587f346 |
151 | |
c0bf6f9f |
152 | /* buggy and slow, probably because function call overhead |
153 | @ renderer helper, based on bitbank's method |
154 | .global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal |
155 | |
156 | draw8pix: |
157 | stmfd sp!, {r4,r5} |
158 | |
159 | ldrb r3, [r1] @ get bit 0 pixels |
160 | mov r12,#1 |
161 | orr r12,r12,r12,lsl #8 |
162 | orr r12,r12,r12,lsl #16 |
163 | ldrb r1, [r1, #8] @ get bit 1 pixels |
164 | orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit |
165 | orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes |
166 | and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want |
167 | and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want |
168 | ldr r2, [r2] |
169 | |
170 | orr r1, r1, r1, lsl #9 @ process the bit 1 pixels |
171 | orr r1, r1, r1, lsl #18 |
172 | and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want |
173 | and r1, r12,r1, lsr #3 @ mask off the lower nibble |
174 | orr r4, r4, r3, lsl #1 |
175 | orr r5, r5, r1, lsl #5 |
176 | |
177 | @ can this be avoided? |
178 | mov r4, r4, lsl #3 @ *8 |
179 | mov r3, r2, ror r4 |
180 | strb r3, [r0], #1 |
181 | mov r4, r4, lsr #8 |
182 | mov r3, r2, ror r4 |
183 | strb r3, [r0], #1 |
184 | mov r4, r4, lsr #8 |
185 | mov r3, r2, ror r4 |
186 | strb r3, [r0], #1 |
187 | mov r4, r4, lsr #8 |
188 | mov r3, r2, ror r4 |
189 | strb r3, [r0], #1 |
190 | |
191 | mov r5, r5, lsl #3 @ *8 |
192 | mov r3, r2, ror r5 |
193 | strb r3, [r0], #1 |
194 | mov r5, r5, lsr #8 |
195 | mov r3, r2, ror r5 |
196 | strb r3, [r0], #1 |
197 | mov r5, r5, lsr #8 |
198 | mov r3, r2, ror r5 |
199 | strb r3, [r0], #1 |
200 | mov r5, r5, lsr #8 |
201 | mov r3, r2, ror r5 |
202 | strb r3, [r0], #1 |
203 | |
204 | ldmfd sp!, {r4,r5} |
205 | bx lr |
206 | */ |
207 | |