merged ppu code, added input+zapper, FDS/VS insert in menu
[fceu.git] / drivers / gp2x / asmutils.s
... / ...
CommitLineData
1@ vim:filetype=armasm
2
3@ test
4.global flushcache @ beginning_addr, end_addr, flags
5
6flushcache:
7 swi #0x9f0002
8 mov pc, lr
9
10
11.global block_or @ void *src, size_t n, int pat
12
13block_or:
14 stmfd sp!, {r4-r5}
15 orr r2, r2, r2, lsl #8
16 orr r2, r2, r2, lsl #16
17 mov r1, r1, lsr #4
18block_loop_or:
19 ldmia r0, {r3-r5,r12}
20 subs r1, r1, #1
21 orr r3, r3, r2
22 orr r4, r4, r2
23 orr r5, r5, r2
24 orr r12,r12,r2
25 stmia r0!, {r3-r5,r12}
26 bne block_loop_or
27 ldmfd sp!, {r4-r5}
28 bx lr
29
30
31.global block_and @ void *src, size_t n, int andpat
32
33block_and:
34 stmfd sp!, {r4-r5}
35 orr r2, r2, r2, lsl #8
36 orr r2, r2, r2, lsl #16
37 mov r1, r1, lsr #4
38block_loop_and:
39 ldmia r0, {r3-r5,r12}
40 subs r1, r1, #1
41 and r3, r3, r2
42 and r4, r4, r2
43 and r5, r5, r2
44 and r12,r12,r2
45 stmia r0!, {r3-r5,r12}
46 bne block_loop_and
47 ldmfd sp!, {r4-r5}
48 bx lr
49
50
51.global block_andor @ void *src, size_t n, int andpat, int orpat
52
53block_andor:
54 stmfd sp!, {r4-r6}
55 orr r2, r2, r2, lsl #8
56 orr r2, r2, r2, lsl #16
57 orr r3, r3, r3, lsl #8
58 orr r3, r3, r3, lsl #16
59 mov r1, r1, lsr #4
60block_loop_andor:
61 ldmia r0, {r4-r6,r12}
62 subs r1, r1, #1
63 and r4, r4, r2
64 orr r4, r4, r3
65 and r5, r5, r2
66 orr r5, r5, r3
67 and r6, r6, r2
68 orr r6, r6, r3
69 and r12,r12,r2
70 orr r12,r12,r3
71 stmia r0!, {r4-r6,r12}
72 bne block_loop_andor
73 ldmfd sp!, {r4-r6}
74 bx lr
75
76
77.global spend_cycles @ c
78
79spend_cycles:
80 mov r0, r0, lsr #2 @ 4 cycles/iteration
81 sub r0, r0, #2 @ entry/exit/init
82.sc_loop:
83 subs r0, r0, #1
84 bpl .sc_loop
85
86 bx lr
87
88
89.global soft_scale @ void *dst, unsigned short *pal, int offs, int lines
90
91soft_scale:
92 stmfd sp!,{r4-r11,lr}
93 mov lr, #0xff
94 mov lr, lr, lsl #1
95 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
96 orr r9, r9, #0x00e7
97
98 mov r11,r3 @ r11= line counter
99 mov r3, r1 @ r3 = pal base
100
101 mov r12,#320
102 mul r2, r12,r2
103 add r4, r0, r2, lsl #1 @ r4 = dst_start
104 add r5, r0, r2 @ r5 = src_start
105 mul r12,r11,r12
106 add r0, r4, r12,lsl #1 @ r0 = dst_end
107 add r1, r5, r12 @ r1 = src_end
108
109soft_scale_loop:
110 sub r1, r1, #64 @ skip borders
111 mov r2, #256/8
112
113soft_scale_loop_line:
114 ldr r12, [r1, #-8]!
115 ldr r7, [r1, #4]
116
117 and r4, lr, r12,lsl #1
118 ldrh r4, [r3, r4]
119 and r5, lr, r12,lsr #7
120 ldrh r5, [r3, r5]
121 and r4, r4, r9, lsl #2
122 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0
123 and r5, r5, r9, lsl #2
124 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1
125 add r4, r4, r6, lsl #16 @ pix_d 0, 1
126 and r6, lr, r12,lsr #15
127 ldrh r6, [r3, r6]
128 and r12,lr, r12,lsr #23
129 ldrh r12,[r3, r12]
130 and r6, r6, r9, lsl #2
131 add r5, r5, r6
132 mov r5, r5, lsr #1
133 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2
134 orr r5, r5, r6, lsl #16
135
136 and r6, lr, r7, lsl #1
137 ldrh r6, [r3, r6]
138 and r12,r12,r9, lsl #2
139 add r5, r5, r12,lsl #14 @ pix_d 2, 3
140 and r6, r6, r9, lsl #2
141 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
142
143 and r12,lr, r7, lsr #7
144 ldrh r12,[r3, r12]
145 and r10,lr, r7, lsr #15
146 ldrh r10,[r3, r10]
147 and r12,r12,r9, lsl #2
148 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1
149 add r8, r8, r6, lsr #18
150 and r7, lr, r7, lsr #23
151 ldrh r7, [r3, r7]
152 and r10,r10,r9, lsl #2
153 orr r8, r8, r10,lsl #15
154 add r8, r8, r12,lsl #15 @ pix_d 6, 7
155 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2
156 and r7, r7, r9, lsl #2
157 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3
158 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
159
160 subs r2, r2, #1
161
162 stmdb r0!, {r4,r5,r6,r8,r10}
163 bne soft_scale_loop_line
164
165 subs r11,r11,#1
166 bne soft_scale_loop
167
168 ldmfd sp!,{r4-r11,lr}
169 bx lr
170
171
172/* buggy and slow, probably because function call overhead
173@ renderer helper, based on bitbank's method
174.global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal
175
176draw8pix:
177 stmfd sp!, {r4,r5}
178
179 ldrb r3, [r1] @ get bit 0 pixels
180 mov r12,#1
181 orr r12,r12,r12,lsl #8
182 orr r12,r12,r12,lsl #16
183 ldrb r1, [r1, #8] @ get bit 1 pixels
184 orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit
185 orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes
186 and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want
187 and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want
188 ldr r2, [r2]
189
190 orr r1, r1, r1, lsl #9 @ process the bit 1 pixels
191 orr r1, r1, r1, lsl #18
192 and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want
193 and r1, r12,r1, lsr #3 @ mask off the lower nibble
194 orr r4, r4, r3, lsl #1
195 orr r5, r5, r1, lsl #5
196
197 @ can this be avoided?
198 mov r4, r4, lsl #3 @ *8
199 mov r3, r2, ror r4
200 strb r3, [r0], #1
201 mov r4, r4, lsr #8
202 mov r3, r2, ror r4
203 strb r3, [r0], #1
204 mov r4, r4, lsr #8
205 mov r3, r2, ror r4
206 strb r3, [r0], #1
207 mov r4, r4, lsr #8
208 mov r3, r2, ror r4
209 strb r3, [r0], #1
210
211 mov r5, r5, lsl #3 @ *8
212 mov r3, r2, ror r5
213 strb r3, [r0], #1
214 mov r5, r5, lsr #8
215 mov r3, r2, ror r5
216 strb r3, [r0], #1
217 mov r5, r5, lsr #8
218 mov r3, r2, ror r5
219 strb r3, [r0], #1
220 mov r5, r5, lsr #8
221 mov r3, r2, ror r5
222 strb r3, [r0], #1
223
224 ldmfd sp!, {r4,r5}
225 bx lr
226*/
227