32x: still tweaking renderers
[picodrive.git] / pico / 32x / draw_arm.s
CommitLineData
5a681086 1@ vim:filetype=armasm
2
3.extern Pico32x
4.extern PicoDraw2FB
5.extern HighPal
6
7.equiv P32XV_PRI, (1<< 7)
8
9.bss
10.align 2
11.global Pico32xNativePal
12Pico32xNativePal:
13 .word 0
14
15.text
16.align 2
17
18
19.macro call_scan_prep cond
20.if \cond
21 ldr r4, =PicoScan32xBegin
22 ldr r5, =PicoScan32xEnd
23 ldr r6, =DrawLineDest
24 ldr r4, [r4]
25 ldr r5, [r5]
26 stmfd sp!, {r4,r5,r6}
27.endif
28.endm
29
30.macro call_scan_fin_ge cond
31.if \cond
32 addge sp, sp, #4*3
33.endif
34.endm
35
36.macro call_scan_begin cond
37.if \cond
38 stmfd sp!, {r1-r3}
39 and r0, r2, #0xff
40 add r0, r0, r4
41 mov lr, pc
42 ldr pc, [sp, #(3+0)*4]
43 ldr r0, [sp, #(3+2)*4] @ &DrawLineDest
44 ldmfd sp!, {r1-r3}
45 ldr r0, [r0]
46.endif
47.endm
48
49.macro call_scan_end cond
50.if \cond
51 stmfd sp!, {r0-r3}
52 and r0, r2, #0xff
53 add r0, r0, r4
54 mov lr, pc
55 ldr pc, [sp, #(4+1)*4]
56 ldmfd sp!, {r0-r3}
57.endif
58.endm
59
60@ direct color
e51e5983 61@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 62.macro make_do_loop_dc name call_scan do_md
63.global \name
64\name:
65 stmfd sp!, {r4-r11,lr}
66
67 ldr r10,=Pico32x
68 ldr r11,=PicoDraw2FB
69 ldr r10,[r10, #0x40] @ Pico32x.vdp_regs[0]
70 ldr r11,[r11]
71 ldr r9, =HighPal @ palmd
7a961c19 72 and r4, r2, #0xff
73 mov r5, #328
74 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
5a681086 75 tst r10,#P32XV_PRI
76 moveq r10,#0
77 movne r10,#0x8000 @ r10 = inv_bit
78 call_scan_prep \call_scan
79
80 mov r4, #0 @ line
81 b 1f @ loop_outer_entry
82
830: @ loop_outer:
84 call_scan_end \call_scan
85 add r4, r4, #1
86 sub r11,r11,#1 @ adjust for prev read
87 cmp r4, r2, lsr #16
88 call_scan_fin_ge \call_scan
89 ldmgefd sp!, {r4-r11,pc}
90
911: @ loop_outer_entry:
92 call_scan_begin \call_scan
93 mov r12,r4, lsl #1
94 ldrh r12,[r1, r12]
95 add r11,r11,#8
96 mov r6, #320
97 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
98
992: @ loop_inner:
100 ldrb r7, [r11], #1 @ MD pixel
101 subs r6, r6, #1
102 blt 0b @ loop_outer
103 ldrh r8, [r5], #2 @ 32x pixel
104 cmp r7, r3 @ MD has bg pixel?
105 beq 3f @ draw32x
106 eor r12,r8, r10
107 ands r12,r12,#0x8000 @ !((t ^ inv) & 0x8000)
108.if \do_md
109 mov r7, r7, lsl #1
110 ldreqh r12,[r9, r7]
111 streqh r12,[r0], #2 @ *dst++ = palmd[*pmd]
112.endif
113 beq 2b @ loop_inner
114
1153: @ draw32x:
116 and r12,r8, #0x03e0
117 mov r8, r8, lsl #11
118 orr r8, r8, r8, lsr #(10+11)
119 orr r8, r8, r12,lsl #1
120 bic r8, r8, #0x0020 @ kill prio bit
121 strh r8, [r0], #2 @ *dst++ = bgr2rgb(*p32x++)
122 b 2b @ loop_inner
123.endm
124
125
126@ packed pixel
07e5dbab 127@ note: this may read a few bytes over the end of PicoDraw2FB and dram,
128@ so those should have a bit more alloc'ed than really needed.
e51e5983 129@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 130.macro make_do_loop_pp name call_scan do_md
131.global \name
132\name:
133 stmfd sp!, {r4-r11,lr}
134
135 ldr r11,=PicoDraw2FB
136 ldr r10,=Pico32xNativePal
137 ldr r11,[r11]
138 ldr r10,[r10]
139 ldr r9, =HighPal @ palmd
7a961c19 140 and r4, r2, #0xff
141 mov r5, #328
142 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
5a681086 143 call_scan_prep \call_scan
144
145 mov r4, #0 @ line
146 b 1f @ loop_outer_entry
147
1480: @ loop_outer:
149 call_scan_end \call_scan
150 add r4, r4, #1
151 cmp r4, r2, lsr #16
152 call_scan_fin_ge \call_scan
153 ldmgefd sp!, {r4-r11,pc}
154
1551: @ loop_outer_entry:
156 call_scan_begin \call_scan
157 mov r12,r4, lsl #1
158 ldrh r12,[r1, r12]
159 add r11,r11,#8
07e5dbab 160 mov r6, #320/2
5a681086 161 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
e51e5983 162 and r12,r2, #0x100 @ shift
163 add r5, r5, r12,lsr #8
5a681086 164
1652: @ loop_inner:
07e5dbab 166@ r4,r6 - counters; r5 - 32x data; r9,r10 - md,32x pal; r11 - md data
167@ r7,r8,r12,lr - temp
168 tst r5, #1
169 ldreqb r8, [r5], #2
170 ldrb r7, [r5, #-1]
171 ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index
172 subs r6, r6, #1
173 blt 0b @ loop_outer
174 cmp r7, r8
175 beq 5f @ check_fill @ +8
176
1773: @ no_fill:
178 mov r12,r7, lsl #1
179 mov lr, r8, lsl #1
180 ldrh r7, [r10,r12]
181 ldrh r8, [r10,lr]
182 add r11,r11,#2
183
184 eor r12,r7, #0x20
185 tst r12,#0x20
186 ldrneb r12,[r11,#-2] @ MD pixel 0
187 eor lr, r8, #0x20
188 cmpne r12,r3 @ MD has bg pixel?
189.if \do_md
190 mov r12,r12,lsl #1
191 ldrneh r7, [r9, r12] @ t = palmd[pmd[0]]
192 tst lr, #0x20
193 ldrneb lr, [r11,#-1] @ MD pixel 1
194 strh r7, [r0], #2
195 cmpne lr, r3 @ MD has bg pixel?
196 mov lr, lr, lsl #1
197 ldrneh r8, [r9, lr] @ t = palmd[pmd[1]]
198 strh r8, [r0], #2
199.else
200 streqh r7, [r0]
201 tst lr, #0x20
202 ldrneb lr, [r11,#-1] @ MD pixel 1
203 add r0, r0, #4
204 cmpne lr, r3 @ MD has bg pixel?
205 streqh r8, [r0, #-2]
206.endif
207 b 2b @ loop_inner
208
2095: @ check_fill
210 @ count pixels, align if needed
211 bic r12,r5, #1
212 ldrh r12,[r12]
213 orr lr, r7, r7, lsl #8
214 cmp r12,lr
215 bne 3b @ no_fill
216
217 tst r5, #1
218 sub lr, r5, #2 @ starting r5 (32x render data start)
219 addeq r5, r5, #2
220 addne r5, r5, #1 @ add for the check above
221 add r6, r6, #1 @ restore from dec
222 orr r7, r7, r7, lsl #8
2236:
224 sub r12,r5, lr
225 ldrh r8, [r5], #2
226 cmp r12,r6, lsl #1
227 ldrh r12,[r5], #2
228 bge 7f @ count_done
229 cmp r8, r7
230 cmpeq r12,r7
231 beq 6b
232
2337: @ count_done
234 sub r5, r5, #4 @ undo readahead
235
236 @ fix alignment and check type
237 sub r8, r5, lr
238 tst r8, #1
239 subne r5, r5, #1
240 subne r8, r8, #1
241
242 and r7, r7, #0xff
243 cmp r8, r6, lsl #1
244 mov r7, r7, lsl #1
245 movgt r8, r6, lsl #1 @ r8=count
246 ldrh r7, [r10,r7]
247 sub r6, r6, r8, lsr #1 @ adjust counter
248 tst r7, #0x20
249 beq 9f @ bg_mode
250
251 add r11,r11,r8
2528:
253 subs r8, r8, #2
254 strgeh r7, [r0], #2
255 strgeh r7, [r0], #2
256 bgt 8b
257 b 2b @ loop_inner
258
2599: @ bg_mode:
260 ldrb r12,[r11],#1 @ MD pixel
261 ldrb lr, [r11],#1
262 cmp r12,r3 @ MD has bg pixel?
263.if \do_md
264 mov r12,r12,lsl #1
265 ldrneh r12,[r9, r12] @ t = palmd[*pmd]
266 moveq r12,r7
267 cmp lr, r3
268 mov lr, lr, lsl #1
269 ldrneh lr, [r9, lr]
270 moveq lr, r7
271 strh r12,[r0], #2
272 strh lr, [r0], #2
273.else
274 streqh r7, [r0]
275 cmp lr, r3
276 streqh r7, [r0, #2]
277 add r0, r0, #4
278.endif
279 subs r8, r8, #2
280 bgt 9b @ bg_mode
281 b 2b @ loop_inner
5a681086 282.endm
283
284
285@ run length
e51e5983 286@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 287.macro make_do_loop_rl name call_scan do_md
288.global \name
289\name:
290 stmfd sp!, {r4-r11,lr}
291
292 ldr r11,=PicoDraw2FB
293 ldr r10,=Pico32xNativePal
294 ldr r11,[r11]
295 ldr r10,[r10]
296 ldr r9, =HighPal @ palmd
7a961c19 297 and r4, r2, #0xff
298 mov r5, #328
299 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
5a681086 300 call_scan_prep \call_scan
301
302 mov r4, #0 @ line
303 b 1f @ loop_outer_entry
304
3050: @ loop_outer:
306 call_scan_end \call_scan
307 add r4, r4, #1
308 sub r11,r11,#1 @ adjust for prev read
309 cmp r4, r2, lsr #16
310 call_scan_fin_ge \call_scan
311 ldmgefd sp!, {r4-r11,pc}
312
3131: @ loop_outer_entry:
314 call_scan_begin \call_scan
315 mov r12,r4, lsl #1
316 ldrh r12,[r1, r12]
317 add r11,r11,#8
318 mov r6, #320
319 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
320
3212: @ loop_inner:
322 ldrh r8, [r5], #2 @ control word
323 and r12,r8, #0xff
324 mov r12,r12,lsl #1
325 ldrh lr, [r10,r12] @ t = 32x pixel
326 eor lr, lr, #0x20
327
3283: @ loop_innermost:
329 ldrb r7, [r11], #1 @ MD pixel
330 subs r6, r6, #1
331 blt 0b @ loop_outer
332 cmp r7, r3 @ MD has bg pixel?
333 mov r7, r7, lsl #1
334 tstne lr, #0x20
335.if \do_md
336 ldrneh r12,[r9, r7] @ t = palmd[*pmd]
337 streqh lr, [r0], #2
338 strneh r12,[r0], #2 @ *dst++ = t
339.else
340 streqh lr, [r0]
341 add r0, r0, #2
342.endif
343 subs r8, r8, #0x100
344 bge 3b @ loop_innermost
345 b 2b @ loop_inner
346.endm
347
348
349make_do_loop_dc do_loop_dc, 0, 0
350make_do_loop_dc do_loop_dc_md, 0, 1
351make_do_loop_dc do_loop_dc_scan, 1, 0
352make_do_loop_dc do_loop_dc_scan_md, 1, 1
353
354make_do_loop_pp do_loop_pp, 0, 0
355make_do_loop_pp do_loop_pp_md, 0, 1
356make_do_loop_pp do_loop_pp_scan, 1, 0
357make_do_loop_pp do_loop_pp_scan_md, 1, 1
358
359make_do_loop_rl do_loop_rl, 0, 0
360make_do_loop_rl do_loop_rl_md, 0, 1
361make_do_loop_rl do_loop_rl_scan, 1, 0
362make_do_loop_rl do_loop_rl_scan_md, 1, 1
363