sh2: timing fixes
[picodrive.git] / pico / 32x / draw_arm.s
CommitLineData
cff531af 1@*
2@* PicoDrive
3@* (C) notaz, 2010
4@*
5@* This work is licensed under the terms of MAME license.
6@* See COPYING file in the top-level directory.
7@*
5a681086 8
9.extern Pico32x
10.extern PicoDraw2FB
11.extern HighPal
12
13.equiv P32XV_PRI, (1<< 7)
14
15.bss
16.align 2
17.global Pico32xNativePal
18Pico32xNativePal:
19 .word 0
20
21.text
22.align 2
23
24
25.macro call_scan_prep cond
26.if \cond
27 ldr r4, =PicoScan32xBegin
28 ldr r5, =PicoScan32xEnd
29 ldr r6, =DrawLineDest
30 ldr r4, [r4]
31 ldr r5, [r5]
32 stmfd sp!, {r4,r5,r6}
33.endif
34.endm
35
36.macro call_scan_fin_ge cond
37.if \cond
38 addge sp, sp, #4*3
39.endif
40.endm
41
42.macro call_scan_begin cond
43.if \cond
44 stmfd sp!, {r1-r3}
45 and r0, r2, #0xff
46 add r0, r0, r4
47 mov lr, pc
48 ldr pc, [sp, #(3+0)*4]
49 ldr r0, [sp, #(3+2)*4] @ &DrawLineDest
50 ldmfd sp!, {r1-r3}
51 ldr r0, [r0]
52.endif
53.endm
54
55.macro call_scan_end cond
56.if \cond
57 stmfd sp!, {r0-r3}
58 and r0, r2, #0xff
59 add r0, r0, r4
60 mov lr, pc
61 ldr pc, [sp, #(4+1)*4]
62 ldmfd sp!, {r0-r3}
63.endif
64.endm
65
66@ direct color
e51e5983 67@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 68.macro make_do_loop_dc name call_scan do_md
69.global \name
70\name:
71 stmfd sp!, {r4-r11,lr}
72
73 ldr r10,=Pico32x
74 ldr r11,=PicoDraw2FB
75 ldr r10,[r10, #0x40] @ Pico32x.vdp_regs[0]
76 ldr r11,[r11]
77 ldr r9, =HighPal @ palmd
7a961c19 78 and r4, r2, #0xff
79 mov r5, #328
80 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
5a681086 81 tst r10,#P32XV_PRI
82 moveq r10,#0
83 movne r10,#0x8000 @ r10 = inv_bit
84 call_scan_prep \call_scan
85
86 mov r4, #0 @ line
87 b 1f @ loop_outer_entry
88
890: @ loop_outer:
90 call_scan_end \call_scan
91 add r4, r4, #1
92 sub r11,r11,#1 @ adjust for prev read
93 cmp r4, r2, lsr #16
94 call_scan_fin_ge \call_scan
95 ldmgefd sp!, {r4-r11,pc}
96
971: @ loop_outer_entry:
98 call_scan_begin \call_scan
99 mov r12,r4, lsl #1
100 ldrh r12,[r1, r12]
101 add r11,r11,#8
102 mov r6, #320
103 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
104
1052: @ loop_inner:
106 ldrb r7, [r11], #1 @ MD pixel
107 subs r6, r6, #1
108 blt 0b @ loop_outer
109 ldrh r8, [r5], #2 @ 32x pixel
110 cmp r7, r3 @ MD has bg pixel?
111 beq 3f @ draw32x
112 eor r12,r8, r10
113 ands r12,r12,#0x8000 @ !((t ^ inv) & 0x8000)
114.if \do_md
115 mov r7, r7, lsl #1
116 ldreqh r12,[r9, r7]
117 streqh r12,[r0], #2 @ *dst++ = palmd[*pmd]
118.endif
119 beq 2b @ loop_inner
120
1213: @ draw32x:
122 and r12,r8, #0x03e0
123 mov r8, r8, lsl #11
124 orr r8, r8, r8, lsr #(10+11)
125 orr r8, r8, r12,lsl #1
126 bic r8, r8, #0x0020 @ kill prio bit
127 strh r8, [r0], #2 @ *dst++ = bgr2rgb(*p32x++)
128 b 2b @ loop_inner
129.endm
130
131
132@ packed pixel
07e5dbab 133@ note: this may read a few bytes over the end of PicoDraw2FB and dram,
134@ so those should have a bit more alloc'ed than really needed.
e51e5983 135@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 136.macro make_do_loop_pp name call_scan do_md
137.global \name
138\name:
139 stmfd sp!, {r4-r11,lr}
140
141 ldr r11,=PicoDraw2FB
142 ldr r10,=Pico32xNativePal
143 ldr r11,[r11]
144 ldr r10,[r10]
145 ldr r9, =HighPal @ palmd
7a961c19 146 and r4, r2, #0xff
147 mov r5, #328
148 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
5a681086 149 call_scan_prep \call_scan
150
151 mov r4, #0 @ line
152 b 1f @ loop_outer_entry
153
1540: @ loop_outer:
155 call_scan_end \call_scan
156 add r4, r4, #1
157 cmp r4, r2, lsr #16
158 call_scan_fin_ge \call_scan
159 ldmgefd sp!, {r4-r11,pc}
160
1611: @ loop_outer_entry:
162 call_scan_begin \call_scan
163 mov r12,r4, lsl #1
164 ldrh r12,[r1, r12]
165 add r11,r11,#8
07e5dbab 166 mov r6, #320/2
5a681086 167 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
e51e5983 168 and r12,r2, #0x100 @ shift
169 add r5, r5, r12,lsr #8
5a681086 170
1712: @ loop_inner:
07e5dbab 172@ r4,r6 - counters; r5 - 32x data; r9,r10 - md,32x pal; r11 - md data
173@ r7,r8,r12,lr - temp
174 tst r5, #1
175 ldreqb r8, [r5], #2
176 ldrb r7, [r5, #-1]
177 ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index
178 subs r6, r6, #1
179 blt 0b @ loop_outer
180 cmp r7, r8
181 beq 5f @ check_fill @ +8
182
1833: @ no_fill:
184 mov r12,r7, lsl #1
185 mov lr, r8, lsl #1
186 ldrh r7, [r10,r12]
187 ldrh r8, [r10,lr]
188 add r11,r11,#2
189
190 eor r12,r7, #0x20
191 tst r12,#0x20
192 ldrneb r12,[r11,#-2] @ MD pixel 0
193 eor lr, r8, #0x20
194 cmpne r12,r3 @ MD has bg pixel?
195.if \do_md
196 mov r12,r12,lsl #1
197 ldrneh r7, [r9, r12] @ t = palmd[pmd[0]]
198 tst lr, #0x20
199 ldrneb lr, [r11,#-1] @ MD pixel 1
200 strh r7, [r0], #2
201 cmpne lr, r3 @ MD has bg pixel?
202 mov lr, lr, lsl #1
203 ldrneh r8, [r9, lr] @ t = palmd[pmd[1]]
204 strh r8, [r0], #2
205.else
206 streqh r7, [r0]
207 tst lr, #0x20
208 ldrneb lr, [r11,#-1] @ MD pixel 1
209 add r0, r0, #4
210 cmpne lr, r3 @ MD has bg pixel?
211 streqh r8, [r0, #-2]
212.endif
213 b 2b @ loop_inner
214
2155: @ check_fill
216 @ count pixels, align if needed
217 bic r12,r5, #1
218 ldrh r12,[r12]
219 orr lr, r7, r7, lsl #8
220 cmp r12,lr
221 bne 3b @ no_fill
222
223 tst r5, #1
224 sub lr, r5, #2 @ starting r5 (32x render data start)
225 addeq r5, r5, #2
226 addne r5, r5, #1 @ add for the check above
227 add r6, r6, #1 @ restore from dec
228 orr r7, r7, r7, lsl #8
2296:
230 sub r12,r5, lr
231 ldrh r8, [r5], #2
232 cmp r12,r6, lsl #1
233 ldrh r12,[r5], #2
234 bge 7f @ count_done
235 cmp r8, r7
236 cmpeq r12,r7
237 beq 6b
238
2397: @ count_done
240 sub r5, r5, #4 @ undo readahead
241
242 @ fix alignment and check type
243 sub r8, r5, lr
244 tst r8, #1
245 subne r5, r5, #1
246 subne r8, r8, #1
247
248 and r7, r7, #0xff
249 cmp r8, r6, lsl #1
250 mov r7, r7, lsl #1
251 movgt r8, r6, lsl #1 @ r8=count
252 ldrh r7, [r10,r7]
253 sub r6, r6, r8, lsr #1 @ adjust counter
254 tst r7, #0x20
255 beq 9f @ bg_mode
256
257 add r11,r11,r8
2588:
259 subs r8, r8, #2
260 strgeh r7, [r0], #2
261 strgeh r7, [r0], #2
262 bgt 8b
263 b 2b @ loop_inner
264
2659: @ bg_mode:
266 ldrb r12,[r11],#1 @ MD pixel
267 ldrb lr, [r11],#1
268 cmp r12,r3 @ MD has bg pixel?
269.if \do_md
270 mov r12,r12,lsl #1
271 ldrneh r12,[r9, r12] @ t = palmd[*pmd]
272 moveq r12,r7
273 cmp lr, r3
274 mov lr, lr, lsl #1
275 ldrneh lr, [r9, lr]
276 moveq lr, r7
277 strh r12,[r0], #2
278 strh lr, [r0], #2
279.else
280 streqh r7, [r0]
281 cmp lr, r3
282 streqh r7, [r0, #2]
283 add r0, r0, #4
284.endif
285 subs r8, r8, #2
286 bgt 9b @ bg_mode
287 b 2b @ loop_inner
5a681086 288.endm
289
290
291@ run length
e51e5983 292@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
5a681086 293.macro make_do_loop_rl name call_scan do_md
294.global \name
295\name:
296 stmfd sp!, {r4-r11,lr}
297
298 ldr r11,=PicoDraw2FB
299 ldr r10,=Pico32xNativePal
300 ldr r11,[r11]
301 ldr r10,[r10]
302 ldr r9, =HighPal @ palmd
7a961c19 303 and r4, r2, #0xff
304 mov r5, #328
305 mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
5a681086 306 call_scan_prep \call_scan
307
308 mov r4, #0 @ line
309 b 1f @ loop_outer_entry
310
3110: @ loop_outer:
312 call_scan_end \call_scan
313 add r4, r4, #1
314 sub r11,r11,#1 @ adjust for prev read
315 cmp r4, r2, lsr #16
316 call_scan_fin_ge \call_scan
317 ldmgefd sp!, {r4-r11,pc}
318
3191: @ loop_outer_entry:
320 call_scan_begin \call_scan
321 mov r12,r4, lsl #1
322 ldrh r12,[r1, r12]
323 add r11,r11,#8
324 mov r6, #320
325 add r5, r1, r12, lsl #1 @ p32x = dram + dram[l]
326
3272: @ loop_inner:
328 ldrh r8, [r5], #2 @ control word
329 and r12,r8, #0xff
330 mov r12,r12,lsl #1
331 ldrh lr, [r10,r12] @ t = 32x pixel
332 eor lr, lr, #0x20
333
3343: @ loop_innermost:
335 ldrb r7, [r11], #1 @ MD pixel
336 subs r6, r6, #1
337 blt 0b @ loop_outer
338 cmp r7, r3 @ MD has bg pixel?
339 mov r7, r7, lsl #1
340 tstne lr, #0x20
341.if \do_md
342 ldrneh r12,[r9, r7] @ t = palmd[*pmd]
343 streqh lr, [r0], #2
344 strneh r12,[r0], #2 @ *dst++ = t
345.else
346 streqh lr, [r0]
347 add r0, r0, #2
348.endif
349 subs r8, r8, #0x100
350 bge 3b @ loop_innermost
351 b 2b @ loop_inner
352.endm
353
354
355make_do_loop_dc do_loop_dc, 0, 0
356make_do_loop_dc do_loop_dc_md, 0, 1
357make_do_loop_dc do_loop_dc_scan, 1, 0
358make_do_loop_dc do_loop_dc_scan_md, 1, 1
359
360make_do_loop_pp do_loop_pp, 0, 0
361make_do_loop_pp do_loop_pp_md, 0, 1
362make_do_loop_pp do_loop_pp_scan, 1, 0
363make_do_loop_pp do_loop_pp_scan_md, 1, 1
364
365make_do_loop_rl do_loop_rl, 0, 0
366make_do_loop_rl do_loop_rl_md, 0, 1
367make_do_loop_rl do_loop_rl_scan, 1, 0
368make_do_loop_rl do_loop_rl_scan_md, 1, 1
369
cff531af 370@ vim:filetype=armasm