more menu work, scalers, sound
[fceu.git] / drivers / gp2x / asmutils.s
... / ...
CommitLineData
1@ vim:filetype=armasm
2
3@ test
4.global flushcache @ beginning_addr, end_addr, flags
5
6flushcache:
7 swi #0x9f0002
8 mov pc, lr
9
10
11.global block_or @ void *src, size_t n, int pat
12
13block_or:
14 stmfd sp!, {r4-r5}
15 orr r2, r2, r2, lsl #8
16 orr r2, r2, r2, lsl #16
17 mov r1, r1, lsr #4
18block_loop_or:
19 ldmia r0, {r3-r5,r12}
20 subs r1, r1, #1
21 orr r3, r3, r2
22 orr r4, r4, r2
23 orr r5, r5, r2
24 orr r12,r12,r2
25 stmia r0!, {r3-r5,r12}
26 bne block_loop_or
27 ldmfd sp!, {r4-r5}
28 bx lr
29
30
31.global block_andor @ void *src, size_t n, int andpat, int orpat
32
33block_andor:
34 stmfd sp!, {r4-r6}
35 orr r2, r2, r2, lsl #8
36 orr r2, r2, r2, lsl #16
37 orr r3, r3, r3, lsl #8
38 orr r3, r3, r3, lsl #16
39 mov r1, r1, lsr #4
40block_loop_andor:
41 ldmia r0, {r4-r6,r12}
42 subs r1, r1, #1
43 and r4, r4, r2
44 orr r4, r4, r3
45 and r5, r5, r2
46 orr r5, r5, r3
47 and r6, r6, r2
48 orr r6, r6, r3
49 and r12,r12,r2
50 orr r12,r12,r3
51 stmia r0!, {r4-r6,r12}
52 bne block_loop_andor
53 ldmfd sp!, {r4-r6}
54 bx lr
55
56
57.global spend_cycles @ c
58
59spend_cycles:
60 mov r0, r0, lsr #2 @ 4 cycles/iteration
61 sub r0, r0, #2 @ entry/exit/init
62.sc_loop:
63 subs r0, r0, #1
64 bpl .sc_loop
65
66 bx lr
67
68
69.global soft_scale @ void *dst, unsigned short *pal, int offs, int lines
70
71soft_scale:
72 stmfd sp!,{r4-r11,lr}
73 mov lr, #0xff
74 mov lr, lr, lsl #1
75 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007
76 orr r9, r9, #0x00e7
77
78 mov r11,r3 @ r11= line counter
79 mov r3, r1 @ r3 = pal base
80
81 mov r12,#320
82 mul r2, r12,r2
83 add r4, r0, r2, lsl #1 @ r4 = dst_start
84 add r5, r0, r2 @ r5 = src_start
85 mul r12,r11,r12
86 add r0, r4, r12,lsl #1 @ r0 = dst_end
87 add r1, r5, r12 @ r1 = src_end
88
89soft_scale_loop:
90 sub r1, r1, #64 @ skip borders
91 mov r2, #256/8
92
93soft_scale_loop_line:
94 ldr r12, [r1, #-8]!
95 ldr r7, [r1, #4]
96
97 and r4, lr, r12,lsl #1
98 ldrh r4, [r3, r4]
99 and r5, lr, r12,lsr #7
100 ldrh r5, [r3, r5]
101 and r4, r4, r9, lsl #2
102 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0
103 and r5, r5, r9, lsl #2
104 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1
105 add r4, r4, r6, lsl #16 @ pix_d 0, 1
106 and r6, lr, r12,lsr #15
107 ldrh r6, [r3, r6]
108 and r12,lr, r12,lsr #23
109 ldrh r12,[r3, r12]
110 and r6, r6, r9, lsl #2
111 add r5, r5, r6
112 mov r5, r5, lsr #1
113 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2
114 orr r5, r5, r6, lsl #16
115
116 and r6, lr, r7, lsl #1
117 ldrh r6, [r3, r6]
118 and r12,r12,r9, lsl #2
119 add r5, r5, r12,lsl #14 @ pix_d 2, 3
120 and r6, r6, r9, lsl #2
121 orr r6, r12,r6, lsl #16 @ pix_d 4, 5
122
123 and r12,lr, r7, lsr #7
124 ldrh r12,[r3, r12]
125 and r10,lr, r7, lsr #15
126 ldrh r10,[r3, r10]
127 and r12,r12,r9, lsl #2
128 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1
129 add r8, r8, r6, lsr #18
130 and r7, lr, r7, lsr #23
131 ldrh r7, [r3, r7]
132 and r10,r10,r9, lsl #2
133 orr r8, r8, r10,lsl #15
134 add r8, r8, r12,lsl #15 @ pix_d 6, 7
135 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2
136 and r7, r7, r9, lsl #2
137 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3
138 orr r10,r10,r7, lsl #16 @ pix_d 8, 9
139
140 subs r2, r2, #1
141
142 stmdb r0!, {r4,r5,r6,r8,r10}
143 bne soft_scale_loop_line
144
145 subs r11,r11,#1
146 bne soft_scale_loop
147
148 ldmfd sp!,{r4-r11,lr}
149 bx lr
150
151
152/* buggy and slow, probably because function call overhead
153@ renderer helper, based on bitbank's method
154.global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal
155
156draw8pix:
157 stmfd sp!, {r4,r5}
158
159 ldrb r3, [r1] @ get bit 0 pixels
160 mov r12,#1
161 orr r12,r12,r12,lsl #8
162 orr r12,r12,r12,lsl #16
163 ldrb r1, [r1, #8] @ get bit 1 pixels
164 orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit
165 orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes
166 and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want
167 and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want
168 ldr r2, [r2]
169
170 orr r1, r1, r1, lsl #9 @ process the bit 1 pixels
171 orr r1, r1, r1, lsl #18
172 and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want
173 and r1, r12,r1, lsr #3 @ mask off the lower nibble
174 orr r4, r4, r3, lsl #1
175 orr r5, r5, r1, lsl #5
176
177 @ can this be avoided?
178 mov r4, r4, lsl #3 @ *8
179 mov r3, r2, ror r4
180 strb r3, [r0], #1
181 mov r4, r4, lsr #8
182 mov r3, r2, ror r4
183 strb r3, [r0], #1
184 mov r4, r4, lsr #8
185 mov r3, r2, ror r4
186 strb r3, [r0], #1
187 mov r4, r4, lsr #8
188 mov r3, r2, ror r4
189 strb r3, [r0], #1
190
191 mov r5, r5, lsl #3 @ *8
192 mov r3, r2, ror r5
193 strb r3, [r0], #1
194 mov r5, r5, lsr #8
195 mov r3, r2, ror r5
196 strb r3, [r0], #1
197 mov r5, r5, lsr #8
198 mov r3, r2, ror r5
199 strb r3, [r0], #1
200 mov r5, r5, lsr #8
201 mov r3, r2, ror r5
202 strb r3, [r0], #1
203
204 ldmfd sp!, {r4,r5}
205 bx lr
206*/
207