4 .global flushcache @ beginning_addr, end_addr, flags
11 .global block_or @ void *src, size_t n, int pat
15 orr r2, r2, r2, lsl #8
16 orr r2, r2, r2, lsl #16
25 stmia r0!, {r3-r5,r12}
31 .global block_and @ void *src, size_t n, int andpat
35 orr r2, r2, r2, lsl #8
36 orr r2, r2, r2, lsl #16
45 stmia r0!, {r3-r5,r12}
51 .global block_andor @ void *src, size_t n, int andpat, int orpat
55 orr r2, r2, r2, lsl #8
56 orr r2, r2, r2, lsl #16
57 orr r3, r3, r3, lsl #8
58 orr r3, r3, r3, lsl #16
71 stmia r0!, {r4-r6,r12}
77 .global spend_cycles @ c
80 mov r0, r0, lsr #2 @ 4 cycles/iteration
81 sub r0, r0, #2 @ entry/exit/init
89 .global soft_scale @ void *dst, unsigned short *pal, int offs, int lines
95 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
98 mov r11,r3 @ r11= line counter
99 mov r3, r1 @ r3 = pal base
103 add r4, r0, r2, lsl #1 @ r4 = dst_start
104 add r5, r0, r2 @ r5 = src_start
106 add r0, r4, r12,lsl #1 @ r0 = dst_end
107 add r1, r5, r12 @ r1 = src_end
110 sub r1, r1, #64 @ skip borders
113 soft_scale_loop_line:
117 and r4, lr, r12,lsl #1
119 and r5, lr, r12,lsr #7
121 and r4, r4, r9, lsl #2
122 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0
123 and r5, r5, r9, lsl #2
124 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1
125 add r4, r4, r6, lsl #16 @ pix_d 0, 1
126 and r6, lr, r12,lsr #15
128 and r12,lr, r12,lsr #23
130 and r6, r6, r9, lsl #2
133 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2
134 orr r5, r5, r6, lsl #16
136 and r6, lr, r7, lsl #1
138 and r12,r12,r9, lsl #2
139 add r5, r5, r12,lsl #14 @ pix_d 2, 3
140 and r6, r6, r9, lsl #2
141 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
143 and r12,lr, r7, lsr #7
145 and r10,lr, r7, lsr #15
147 and r12,r12,r9, lsl #2
148 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1
149 add r8, r8, r6, lsr #18
150 and r7, lr, r7, lsr #23
152 and r10,r10,r9, lsl #2
153 orr r8, r8, r10,lsl #15
154 add r8, r8, r12,lsl #15 @ pix_d 6, 7
155 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2
156 and r7, r7, r9, lsl #2
157 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3
158 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
162 stmdb r0!, {r4,r5,r6,r8,r10}
163 bne soft_scale_loop_line
168 ldmfd sp!,{r4-r11,lr}
172 /* buggy and slow, probably because function call overhead
173 @ renderer helper, based on bitbank's method
174 .global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal
179 ldrb r3, [r1] @ get bit 0 pixels
181 orr r12,r12,r12,lsl #8
182 orr r12,r12,r12,lsl #16
183 ldrb r1, [r1, #8] @ get bit 1 pixels
184 orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit
185 orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes
186 and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want
187 and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want
190 orr r1, r1, r1, lsl #9 @ process the bit 1 pixels
191 orr r1, r1, r1, lsl #18
192 and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want
193 and r1, r12,r1, lsr #3 @ mask off the lower nibble
194 orr r4, r4, r3, lsl #1
195 orr r5, r5, r1, lsl #5
197 @ can this be avoided?
198 mov r4, r4, lsl #3 @ *8
211 mov r5, r5, lsl #3 @ *8