4 .global flushcache @ beginning_addr, end_addr, flags
11 .global block_or @ void *src, size_t n, int pat
15 orr r2, r2, r2, lsl #8
16 orr r2, r2, r2, lsl #16
25 stmia r0!, {r3-r5,r12}
31 .global block_andor @ void *src, size_t n, int andpat, int orpat
35 orr r2, r2, r2, lsl #8
36 orr r2, r2, r2, lsl #16
37 orr r3, r3, r3, lsl #8
38 orr r3, r3, r3, lsl #16
51 stmia r0!, {r4-r6,r12}
57 .global spend_cycles @ c
60 mov r0, r0, lsr #2 @ 4 cycles/iteration
61 sub r0, r0, #2 @ entry/exit/init
69 .global soft_scale @ void *dst, unsigned short *pal, int offs, int lines
75 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
78 mov r11,r3 @ r11= line counter
79 mov r3, r1 @ r3 = pal base
83 add r4, r0, r2, lsl #1 @ r4 = dst_start
84 add r5, r0, r2 @ r5 = src_start
86 add r0, r4, r12,lsl #1 @ r0 = dst_end
87 add r1, r5, r12 @ r1 = src_end
90 sub r1, r1, #64 @ skip borders
97 and r4, lr, r12,lsl #1
99 and r5, lr, r12,lsr #7
101 and r4, r4, r9, lsl #2
102 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0
103 and r5, r5, r9, lsl #2
104 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1
105 add r4, r4, r6, lsl #16 @ pix_d 0, 1
106 and r6, lr, r12,lsr #15
108 and r12,lr, r12,lsr #23
110 and r6, r6, r9, lsl #2
113 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2
114 orr r5, r5, r6, lsl #16
116 and r6, lr, r7, lsl #1
118 and r12,r12,r9, lsl #2
119 add r5, r5, r12,lsl #14 @ pix_d 2, 3
120 and r6, r6, r9, lsl #2
121 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
123 and r12,lr, r7, lsr #7
125 and r10,lr, r7, lsr #15
127 and r12,r12,r9, lsl #2
128 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1
129 add r8, r8, r6, lsr #18
130 and r7, lr, r7, lsr #23
132 and r10,r10,r9, lsl #2
133 orr r8, r8, r10,lsl #15
134 add r8, r8, r12,lsl #15 @ pix_d 6, 7
135 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2
136 and r7, r7, r9, lsl #2
137 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3
138 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
142 stmdb r0!, {r4,r5,r6,r8,r10}
143 bne soft_scale_loop_line
148 ldmfd sp!,{r4-r11,lr}
152 /* buggy and slow, probably because function call overhead
153 @ renderer helper, based on bitbank's method
154 .global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal
159 ldrb r3, [r1] @ get bit 0 pixels
161 orr r12,r12,r12,lsl #8
162 orr r12,r12,r12,lsl #16
163 ldrb r1, [r1, #8] @ get bit 1 pixels
164 orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit
165 orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes
166 and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want
167 and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want
170 orr r1, r1, r1, lsl #9 @ process the bit 1 pixels
171 orr r1, r1, r1, lsl #18
172 and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want
173 and r1, r12,r1, lsr #3 @ mask off the lower nibble
174 orr r4, r4, r3, lsl #1
175 orr r5, r5, r1, lsl #5
177 @ can this be avoided?
178 mov r4, r4, lsl #3 @ *8
191 mov r5, r5, lsl #3 @ *8