32x: final renderer tweaks; PWM disable kills PWM irqs
[picodrive.git] / pico / 32x / draw_arm.s
CommitLineData
5a681086 1@ vim:filetype=armasm
2
3.extern Pico32x
4.extern PicoDraw2FB
5.extern HighPal
6
7.equiv P32XV_PRI, (1<< 7)
8
9.bss
10.align 2
11.global Pico32xNativePal
12Pico32xNativePal:
13 .word 0
14
15.text
16.align 2
17
18
19.macro call_scan_prep cond
20.if \cond
21 ldr r4, =PicoScan32xBegin
22 ldr r5, =PicoScan32xEnd
23 ldr r6, =DrawLineDest
24 ldr r4, [r4]
25 ldr r5, [r5]
26 stmfd sp!, {r4,r5,r6}
27.endif
28.endm
29
30.macro call_scan_fin_ge cond
31.if \cond
32 addge sp, sp, #4*3
33.endif
34.endm
35
36.macro call_scan_begin cond
37.if \cond
38 stmfd sp!, {r1-r3}
39 and r0, r2, #0xff
40 add r0, r0, r4
41 mov lr, pc
42 ldr pc, [sp, #(3+0)*4]
43 ldr r0, [sp, #(3+2)*4] @ &DrawLineDest
44 ldmfd sp!, {r1-r3}
45 ldr r0, [r0]
46.endif
47.endm
48
49.macro call_scan_end cond
50.if \cond
51 stmfd sp!, {r0-r3}
52 and r0, r2, #0xff
53 add r0, r0, r4
54 mov lr, pc
55 ldr pc, [sp, #(4+1)*4]
56 ldmfd sp!, {r0-r3}
57.endif
58.endm
59
60@ direct color
e51e5983 61@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 62.macro make_do_loop_dc name call_scan do_md
63.global \name
64\name:
65 stmfd sp!, {r4-r11,lr}
66
67 ldr r10,=Pico32x
68 ldr r11,=PicoDraw2FB
69 ldr r10,[r10, #0x40] @ Pico32x.vdp_regs[0]
70 ldr r11,[r11]
71 ldr r9, =HighPal @ palmd
72 add r11,r11,#(328*8) @ r11 = pmd: md data
73 tst r10,#P32XV_PRI
74 moveq r10,#0
75 movne r10,#0x8000 @ r10 = inv_bit
76 call_scan_prep \call_scan
77
78 mov r4, #0 @ line
79 b 1f @ loop_outer_entry
80
810: @ loop_outer:
82 call_scan_end \call_scan
83 add r4, r4, #1
84 sub r11,r11,#1 @ adjust for prev read
85 cmp r4, r2, lsr #16
86 call_scan_fin_ge \call_scan
87 ldmgefd sp!, {r4-r11,pc}
88
891: @ loop_outer_entry:
90 call_scan_begin \call_scan
91 mov r12,r4, lsl #1
92 ldrh r12,[r1, r12]
93 add r11,r11,#8
94 mov r6, #320
95 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
96
972: @ loop_inner:
98 ldrb r7, [r11], #1 @ MD pixel
99 subs r6, r6, #1
100 blt 0b @ loop_outer
101 ldrh r8, [r5], #2 @ 32x pixel
102 cmp r7, r3 @ MD has bg pixel?
103 beq 3f @ draw32x
104 eor r12,r8, r10
105 ands r12,r12,#0x8000 @ !((t ^ inv) & 0x8000)
106.if \do_md
107 mov r7, r7, lsl #1
108 ldreqh r12,[r9, r7]
109 streqh r12,[r0], #2 @ *dst++ = palmd[*pmd]
110.endif
111 beq 2b @ loop_inner
112
1133: @ draw32x:
114 and r12,r8, #0x03e0
115 mov r8, r8, lsl #11
116 orr r8, r8, r8, lsr #(10+11)
117 orr r8, r8, r12,lsl #1
118 bic r8, r8, #0x0020 @ kill prio bit
119 strh r8, [r0], #2 @ *dst++ = bgr2rgb(*p32x++)
120 b 2b @ loop_inner
121.endm
122
123
124@ packed pixel
07e5dbab 125@ note: this may read a few bytes over the end of PicoDraw2FB and dram,
126@ so those should have a bit more alloc'ed than really needed.
e51e5983 127@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 128.macro make_do_loop_pp name call_scan do_md
129.global \name
130\name:
131 stmfd sp!, {r4-r11,lr}
132
133 ldr r11,=PicoDraw2FB
134 ldr r10,=Pico32xNativePal
135 ldr r11,[r11]
136 ldr r10,[r10]
137 ldr r9, =HighPal @ palmd
138 add r11,r11,#(328*8) @ r11 = pmd: md data
139 call_scan_prep \call_scan
140
141 mov r4, #0 @ line
142 b 1f @ loop_outer_entry
143
1440: @ loop_outer:
145 call_scan_end \call_scan
146 add r4, r4, #1
147 cmp r4, r2, lsr #16
148 call_scan_fin_ge \call_scan
149 ldmgefd sp!, {r4-r11,pc}
150
1511: @ loop_outer_entry:
152 call_scan_begin \call_scan
153 mov r12,r4, lsl #1
154 ldrh r12,[r1, r12]
155 add r11,r11,#8
07e5dbab 156 mov r6, #320/2
5a681086 157 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
e51e5983 158 and r12,r2, #0x100 @ shift
159 add r5, r5, r12,lsr #8
5a681086 160
1612: @ loop_inner:
07e5dbab 162@ r4,r6 - counters; r5 - 32x data; r9,r10 - md,32x pal; r11 - md data
163@ r7,r8,r12,lr - temp
164 tst r5, #1
165 ldreqb r8, [r5], #2
166 ldrb r7, [r5, #-1]
167 ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index
168 subs r6, r6, #1
169 blt 0b @ loop_outer
170 cmp r7, r8
171 beq 5f @ check_fill @ +8
172
1733: @ no_fill:
174 mov r12,r7, lsl #1
175 mov lr, r8, lsl #1
176 ldrh r7, [r10,r12]
177 ldrh r8, [r10,lr]
178 add r11,r11,#2
179
180 eor r12,r7, #0x20
181 tst r12,#0x20
182 ldrneb r12,[r11,#-2] @ MD pixel 0
183 eor lr, r8, #0x20
184 cmpne r12,r3 @ MD has bg pixel?
185.if \do_md
186 mov r12,r12,lsl #1
187 ldrneh r7, [r9, r12] @ t = palmd[pmd[0]]
188 tst lr, #0x20
189 ldrneb lr, [r11,#-1] @ MD pixel 1
190 strh r7, [r0], #2
191 cmpne lr, r3 @ MD has bg pixel?
192 mov lr, lr, lsl #1
193 ldrneh r8, [r9, lr] @ t = palmd[pmd[1]]
194 strh r8, [r0], #2
195.else
196 streqh r7, [r0]
197 tst lr, #0x20
198 ldrneb lr, [r11,#-1] @ MD pixel 1
199 add r0, r0, #4
200 cmpne lr, r3 @ MD has bg pixel?
201 streqh r8, [r0, #-2]
202.endif
203 b 2b @ loop_inner
204
2055: @ check_fill
206 @ count pixels, align if needed
207 bic r12,r5, #1
208 ldrh r12,[r12]
209 orr lr, r7, r7, lsl #8
210 cmp r12,lr
211 bne 3b @ no_fill
212
213 tst r5, #1
214 sub lr, r5, #2 @ starting r5 (32x render data start)
215 addeq r5, r5, #2
216 addne r5, r5, #1 @ add for the check above
217 add r6, r6, #1 @ restore from dec
218 orr r7, r7, r7, lsl #8
2196:
220 sub r12,r5, lr
221 ldrh r8, [r5], #2
222 cmp r12,r6, lsl #1
223 ldrh r12,[r5], #2
224 bge 7f @ count_done
225 cmp r8, r7
226 cmpeq r12,r7
227 beq 6b
228
2297: @ count_done
230 sub r5, r5, #4 @ undo readahead
231
232 @ fix alignment and check type
233 sub r8, r5, lr
234 tst r8, #1
235 subne r5, r5, #1
236 subne r8, r8, #1
237
238 and r7, r7, #0xff
239 cmp r8, r6, lsl #1
240 mov r7, r7, lsl #1
241 movgt r8, r6, lsl #1 @ r8=count
242 ldrh r7, [r10,r7]
243 sub r6, r6, r8, lsr #1 @ adjust counter
244 tst r7, #0x20
245 beq 9f @ bg_mode
246
247 add r11,r11,r8
2488:
249 subs r8, r8, #2
250 strgeh r7, [r0], #2
251 strgeh r7, [r0], #2
252 bgt 8b
253 b 2b @ loop_inner
254
2559: @ bg_mode:
256 ldrb r12,[r11],#1 @ MD pixel
257 ldrb lr, [r11],#1
258 cmp r12,r3 @ MD has bg pixel?
259.if \do_md
260 mov r12,r12,lsl #1
261 ldrneh r12,[r9, r12] @ t = palmd[*pmd]
262 moveq r12,r7
263 cmp lr, r3
264 mov lr, lr, lsl #1
265 ldrneh lr, [r9, lr]
266 moveq lr, r7
267 strh r12,[r0], #2
268 strh lr, [r0], #2
269.else
270 streqh r7, [r0]
271 cmp lr, r3
272 streqh r7, [r0, #2]
273 add r0, r0, #4
274.endif
275 subs r8, r8, #2
276 bgt 9b @ bg_mode
277 b 2b @ loop_inner
5a681086 278.endm
279
280
281@ run length
e51e5983 282@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 283.macro make_do_loop_rl name call_scan do_md
284.global \name
285\name:
286 stmfd sp!, {r4-r11,lr}
287
288 ldr r11,=PicoDraw2FB
289 ldr r10,=Pico32xNativePal
290 ldr r11,[r11]
291 ldr r10,[r10]
292 ldr r9, =HighPal @ palmd
293 add r11,r11,#(328*8) @ r11 = pmd: md data
294 call_scan_prep \call_scan
295
296 mov r4, #0 @ line
297 b 1f @ loop_outer_entry
298
2990: @ loop_outer:
300 call_scan_end \call_scan
301 add r4, r4, #1
302 sub r11,r11,#1 @ adjust for prev read
303 cmp r4, r2, lsr #16
304 call_scan_fin_ge \call_scan
305 ldmgefd sp!, {r4-r11,pc}
306
3071: @ loop_outer_entry:
308 call_scan_begin \call_scan
309 mov r12,r4, lsl #1
310 ldrh r12,[r1, r12]
311 add r11,r11,#8
312 mov r6, #320
313 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
314
3152: @ loop_inner:
316 ldrh r8, [r5], #2 @ control word
317 and r12,r8, #0xff
318 mov r12,r12,lsl #1
319 ldrh lr, [r10,r12] @ t = 32x pixel
320 eor lr, lr, #0x20
321
3223: @ loop_innermost:
323 ldrb r7, [r11], #1 @ MD pixel
324 subs r6, r6, #1
325 blt 0b @ loop_outer
326 cmp r7, r3 @ MD has bg pixel?
327 mov r7, r7, lsl #1
328 tstne lr, #0x20
329.if \do_md
330 ldrneh r12,[r9, r7] @ t = palmd[*pmd]
331 streqh lr, [r0], #2
332 strneh r12,[r0], #2 @ *dst++ = t
333.else
334 streqh lr, [r0]
335 add r0, r0, #2
336.endif
337 subs r8, r8, #0x100
338 bge 3b @ loop_innermost
339 b 2b @ loop_inner
340.endm
341
342
343make_do_loop_dc do_loop_dc, 0, 0
344make_do_loop_dc do_loop_dc_md, 0, 1
345make_do_loop_dc do_loop_dc_scan, 1, 0
346make_do_loop_dc do_loop_dc_scan_md, 1, 1
347
348make_do_loop_pp do_loop_pp, 0, 0
349make_do_loop_pp do_loop_pp_md, 0, 1
350make_do_loop_pp do_loop_pp_scan, 1, 0
351make_do_loop_pp do_loop_pp_scan_md, 1, 1
352
353make_do_loop_rl do_loop_rl, 0, 0
354make_do_loop_rl do_loop_rl_md, 0, 1
355make_do_loop_rl do_loop_rl_scan, 1, 0
356make_do_loop_rl do_loop_rl_scan_md, 1, 1
357