add configure, revive pnd build, unify/refactor things
[picodrive.git] / pico / draw_arm.s
CommitLineData
cff531af 1/*\r
2 * assembly optimized versions of most funtions from draw.c\r
3 * (C) notaz, 2006-2010\r
4 *\r
5 * This work is licensed under the terms of MAME license.\r
6 * See COPYING file in the top-level directory.\r
7 *\r
8 * this is highly specialized, be careful if changing related C code!\r
9 */\r
cc68a136 10\r
ea8c405f 11.include "port_config.s"\r
cc68a136 12\r
13.extern Pico\r
14.extern PicoOpt\r
15.extern HighCol\r
b6d7ac70 16.extern DrawScanline\r
cc68a136 17.extern HighSprZ\r
18.extern rendstatus\r
283fec1b 19.extern HighPreSpr\r
cc68a136 20.extern DrawLineDest\r
cc68a136 21.extern DrawStripInterlace\r
07abbab1 22.extern HighCacheS_ptr\r
cc68a136 23\r
5a681086 24.equiv OVERRIDE_HIGHCOL, 1\r
25\r
283fec1b 26.equ PDRAW_SPRITES_MOVED, (1<<0)\r
27.equ PDRAW_WND_DIFF_PRIO, (1<<1)\r
28.equ PDRAW_ACC_SPRITES, (1<<2)\r
29.equ PDRAW_DIRTY_SPRITES, (1<<4)\r
30.equ PDRAW_PLANE_HI_PRIO, (1<<6)\r
31.equ PDRAW_SHHI_DONE, (1<<7)\r
cc68a136 32\r
33@ helper\r
34.macro TilePixel pat lsrr offs\r
35.if !\lsrr\r
36 ands r4, \pat, r2\r
37.else\r
38 ands r4, \pat, r2, lsr #\lsrr\r
39.endif\r
40 orrne r4, r3, r4\r
41 strneb r4, [r1,#\offs]\r
42.endm\r
43\r
44@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
45.macro TileNorm pat\r
46 TilePixel \pat, 12, 0 @ #0x0000f000\r
47 TilePixel \pat, 8, 1 @ #0x00000f00\r
48 TilePixel \pat, 4, 2 @ #0x000000f0\r
49 TilePixel \pat, 0, 3 @ #0x0000000f\r
50 TilePixel \pat, 28, 4 @ #0xf0000000\r
51 TilePixel \pat, 24, 5 @ #0x0f000000\r
52 TilePixel \pat, 20, 6 @ #0x00f00000\r
53 TilePixel \pat, 16, 7 @ #0x000f0000\r
54.endm\r
55\r
56@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
57.macro TileFlip pat\r
58 TilePixel \pat, 16, 0 @ #0x000f0000\r
59 TilePixel \pat, 20, 1 @ #0x00f00000\r
60 TilePixel \pat, 24, 2 @ #0x0f000000\r
61 TilePixel \pat, 28, 3 @ #0xf0000000\r
62 TilePixel \pat, 0, 4 @ #0x0000000f\r
63 TilePixel \pat, 4, 5 @ #0x000000f0\r
64 TilePixel \pat, 8, 6 @ #0x00000f00\r
65 TilePixel \pat, 12, 7 @ #0x0000f000\r
66.endm\r
67\r
68@ shadow/hilight mode\r
69\r
70@ this one is for hi priority layer\r
71.macro TilePixelShHP lsrr offs\r
72.if !\lsrr\r
73 ands r4, r12, r2\r
74.else\r
75 ands r4, r12, r2, lsr #\lsrr\r
76.endif\r
77 ldreqb r4, [r1,#\offs]\r
78 orrne r4, r3, r4\r
bfa12428 79 andeq r4, r4, #0xbf\r
07abbab1 80 strb r4, [r1,#\offs]\r
cc68a136 81.endm\r
82\r
7a7c6476 83@ TileNormShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits\r
cc68a136 84.macro TileNormShHP\r
85 TilePixelShHP 12, 0 @ #0x0000f000\r
86 TilePixelShHP 8, 1 @ #0x00000f00\r
87 TilePixelShHP 4, 2 @ #0x000000f0\r
88 TilePixelShHP 0, 3 @ #0x0000000f\r
89 TilePixelShHP 28, 4 @ #0xf0000000\r
90 TilePixelShHP 24, 5 @ #0x0f000000\r
91 TilePixelShHP 20, 6 @ #0x00f00000\r
92 TilePixelShHP 16, 7 @ #0x000f0000\r
93.endm\r
94\r
7a7c6476 95@ TileFlipShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
cc68a136 96.macro TileFlipShHP\r
97 TilePixelShHP 16, 0 @ #0x000f0000\r
98 TilePixelShHP 20, 1 @ #0x00f00000\r
99 TilePixelShHP 24, 2 @ #0x0f000000\r
100 TilePixelShHP 28, 3 @ #0xf0000000\r
101 TilePixelShHP 0, 4 @ #0x0000000f\r
102 TilePixelShHP 4, 5 @ #0x000000f0\r
103 TilePixelShHP 8, 6 @ #0x00000f00\r
104 TilePixelShHP 12, 7 @ #0x0000f000\r
105.endm\r
106\r
107\r
108@ TileSingleSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx; r12: helper pattern 0xf\r
109.macro TileSingleSh\r
110 tst r0, #1 @ not aligned?\r
111 mov r7, #0x00c000\r
112 orr r7, r7, #0xc0\r
113 ldrneb r4, [r1]\r
114 ldreqh r4, [r1]\r
115 orr r4, r4, r7\r
116 strneb r4, [r1], #1\r
117 streqh r4, [r1], #2\r
118 ldrh r4, [r1]\r
119 orr r4, r4, r7\r
120 strh r4, [r1], #2\r
121 ldrh r4, [r1]\r
122 orr r4, r4, r7\r
123 strh r4, [r1], #2\r
124 ldrh r4, [r1]\r
125 orr r4, r4, r7\r
126 strh r4, [r1], #2\r
127 ldrneb r4, [r1]\r
128 orr r4, r4, r7\r
129 strneb r4, [r1], #1\r
130.endm\r
131\r
132@ TileSingleHi (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
133.macro TileSingleHi\r
134 tst r1, #1 @ not aligned?\r
135 mov r7, #0x008000\r
136 orr r7, r7, #0x80\r
137 ldrneb r4, [r1], #1\r
138 ldreqh r4, [r1], #2 @ 1ci\r
139 ldrh r12, [r1], #2\r
140 bic r4, r4, r7, lsr #1\r
141 orr r4, r4, r7\r
142 strneb r4, [r1, #-3]\r
143 streqh r4, [r1, #-4]\r
144 ldrh r4, [r1], #2\r
145 bic r12, r12, r7, lsr #1\r
146 orr r12, r12, r7\r
147 strh r12, [r1, #-4]\r
148 ldrh r12, [r1], #2\r
149 bic r4, r4, r7, lsr #1\r
150 orr r4, r4, r7\r
151 strh r4, [r1, #-4]\r
152 ldrneb r4, [r1]\r
153 bic r12, r12, r7, lsr #1\r
154 orr r12, r12, r7\r
155 strh r12, [r1, #-2]\r
156 bicne r4, r4, r7, lsr #1\r
157 orrne r4, r4, r7\r
158 strneb r4, [r1], #1\r
159 mov r12, #0xf\r
160.endm\r
161\r
162.macro TileDoShGenPixel shift ofs\r
163.if \shift\r
164 ands r4, r12, r2, lsr #\shift\r
165.else\r
166 ands r4, r12, r2\r
167.endif\r
07abbab1 168 beq 0f\r
cc68a136 169 cmp r4, #0xe\r
bfa12428 170 ldrgeb r7, [r1,#\ofs]\r
171 orrlt r7, r3, r4 @ normal\r
07abbab1 172\r
bfa12428 173 bicge r7, r7, #0xc0\r
174 orrge r7, r7, r4, lsl #6\r
175 strb r7, [r1,#\ofs]\r
07abbab1 1760:\r
cc68a136 177.endm\r
178\r
179@ TileFlipSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
180.macro TileFlipSh\r
181 TileDoShGenPixel 16, 0 @ #0x000f0000\r
182 TileDoShGenPixel 20, 1 @ #0x00f00000\r
183 TileDoShGenPixel 24, 2 @ #0x0f000000\r
184 TileDoShGenPixel 28, 3 @ #0xf0000000\r
185 TileDoShGenPixel 0, 4 @ #0x0000000f\r
186 TileDoShGenPixel 4, 5 @ #0x000000f0\r
187 TileDoShGenPixel 8, 6 @ #0x00000f00\r
188 TileDoShGenPixel 12, 7 @ #0x0000f000\r
189.endm\r
190\r
191@ TileNormSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
192.macro TileNormSh\r
193 TileDoShGenPixel 12, 0 @ #0x0000f000\r
194 TileDoShGenPixel 8, 1 @ #0x00000f00\r
195 TileDoShGenPixel 4, 2 @ #0x000000f0\r
196 TileDoShGenPixel 0, 3 @ #0x0000000f\r
197 TileDoShGenPixel 28, 4 @ #0xf0000000\r
198 TileDoShGenPixel 24, 5 @ #0x0f000000\r
199 TileDoShGenPixel 20, 6 @ #0x00f00000\r
200 TileDoShGenPixel 16, 7 @ #0x000f0000\r
201.endm\r
202\r
bfa12428 203.macro TileDoShGenPixel_markop shift ofs\r
07abbab1 204.if \shift\r
bfa12428 205 ands r4, r12, r2, lsr #\shift\r
07abbab1 206.else\r
bfa12428 207 ands r4, r12, r2\r
07abbab1 208.endif\r
bfa12428 209 beq 0f\r
210 cmp r4, #0xe\r
211 ldrgeb r4, [r1,#\ofs]\r
212 orrlt r4, r3, r4\r
e54507e8 213 orrge r4, r4, #0x80\r
bfa12428 214 strb r4, [r1,#\ofs]\r
2150:\r
07abbab1 216.endm\r
217\r
e54507e8 218.macro TileFlipSh_markop\r
bfa12428 219 TileDoShGenPixel_markop 16, 0 @ #0x000f0000\r
220 TileDoShGenPixel_markop 20, 1 @ #0x00f00000\r
221 TileDoShGenPixel_markop 24, 2 @ #0x0f000000\r
222 TileDoShGenPixel_markop 28, 3 @ #0xf0000000\r
223 TileDoShGenPixel_markop 0, 4 @ #0x0000000f\r
224 TileDoShGenPixel_markop 4, 5 @ #0x000000f0\r
225 TileDoShGenPixel_markop 8, 6 @ #0x00000f00\r
226 TileDoShGenPixel_markop 12, 7 @ #0x0000f000\r
07abbab1 227.endm\r
228\r
e54507e8 229.macro TileNormSh_markop\r
bfa12428 230 TileDoShGenPixel_markop 12, 0 @ #0x0000f000\r
231 TileDoShGenPixel_markop 8, 1 @ #0x00000f00\r
232 TileDoShGenPixel_markop 4, 2 @ #0x000000f0\r
233 TileDoShGenPixel_markop 0, 3 @ #0x0000000f\r
234 TileDoShGenPixel_markop 28, 4 @ #0xf0000000\r
235 TileDoShGenPixel_markop 24, 5 @ #0x0f000000\r
236 TileDoShGenPixel_markop 20, 6 @ #0x00f00000\r
237 TileDoShGenPixel_markop 16, 7 @ #0x000f0000\r
07abbab1 238.endm\r
239\r
240.macro TileDoShGenPixel_onlyop_lp shift ofs\r
241.if \shift\r
242 ands r7, r12, r2, lsr #\shift\r
243.else\r
244 ands r7, r12, r2\r
245.endif\r
246 ldrneb r4, [r1,#\ofs]\r
07abbab1 247 cmp r7, #0xe\r
bfa12428 248 blt 0f\r
249\r
250 tst r4, #0xc0\r
251 bicne r4, r4, #0xc0\r
252 orrne r4, r4, r7, lsl #6\r
253 strneb r4, [r1,#\ofs]\r
07abbab1 2540:\r
255.endm\r
256\r
257.macro TileFlipSh_onlyop_lp\r
258 TileDoShGenPixel_onlyop_lp 16, 0 @ #0x000f0000\r
259 TileDoShGenPixel_onlyop_lp 20, 1 @ #0x00f00000\r
260 TileDoShGenPixel_onlyop_lp 24, 2 @ #0x0f000000\r
261 TileDoShGenPixel_onlyop_lp 28, 3 @ #0xf0000000\r
262 TileDoShGenPixel_onlyop_lp 0, 4 @ #0x0000000f\r
263 TileDoShGenPixel_onlyop_lp 4, 5 @ #0x000000f0\r
264 TileDoShGenPixel_onlyop_lp 8, 6 @ #0x00000f00\r
265 TileDoShGenPixel_onlyop_lp 12, 7 @ #0x0000f000\r
266.endm\r
267\r
268.macro TileNormSh_onlyop_lp\r
269 TileDoShGenPixel_onlyop_lp 12, 0 @ #0x0000f000\r
270 TileDoShGenPixel_onlyop_lp 8, 1 @ #0x00000f00\r
271 TileDoShGenPixel_onlyop_lp 4, 2 @ #0x000000f0\r
272 TileDoShGenPixel_onlyop_lp 0, 3 @ #0x0000000f\r
273 TileDoShGenPixel_onlyop_lp 28, 4 @ #0xf0000000\r
274 TileDoShGenPixel_onlyop_lp 24, 5 @ #0x0f000000\r
275 TileDoShGenPixel_onlyop_lp 20, 6 @ #0x00f00000\r
276 TileDoShGenPixel_onlyop_lp 16, 7 @ #0x000f0000\r
277.endm\r
278\r
cc68a136 279\r
280@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
281\r
282@ struct TileStrip\r
283@ {\r
284@ int nametab; // 0x00\r
285@ int line; // 0x04\r
286@ int hscroll; // 0x08\r
287@ int xmask; // 0x0C\r
288@ int *hc; // 0x10 (pointer to cache buffer)\r
289@ int cells; // 0x14\r
290@ };\r
291\r
83c093a4 292@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells);\r
cc68a136 293\r
83c093a4 294.global DrawLayer\r
cc68a136 295\r
296DrawLayer:\r
297 stmfd sp!, {r4-r11,lr}\r
298\r
299 ldr r11, =(Pico+0x22228) @ Pico.video\r
300 mov r8, #1\r
301\r
83c093a4 302 ldrb r7, [r11, #16] @ ??vv??hh\r
cc68a136 303\r
304 mov r6, r1 @ hcache\r
83c093a4 305 orr r9, r3, r0, lsl #30\r
306 orr r9, r9, r2, lsl #8 @ r9=sh[31]|cellskip[15:8]|maxcells[7:0] (tmp)\r
cc68a136 307\r
308 mov r1, r7, lsl #4\r
309 orr r1, r1, #0x00ff\r
310\r
311 and r10, r7, #3\r
312 cmp r10, #1\r
313 biclt r1, r1, #0xfc00\r
314 biceq r1, r1, #0xfe00\r
315 bicgt r1, r1, #0xff00 @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels\r
316\r
317 add r10, r10, #5\r
318 cmp r10, #7\r
319 subge r10, r10, #1 @ r10=shift[width] (5,6,6,7)\r
320\r
321 @ calculate xmask:\r
322 mov r5, r8, lsl r10\r
323 sub r5, r5, #1 @ r5=xmask\r
324\r
325 @ Find name table:\r
83c093a4 326 ands r0, r0, #1\r
cc68a136 327 ldreqb r12, [r11, #2]\r
328 ldrneb r12, [r11, #4]\r
329\r
b6d7ac70 330 ldr r2, =DrawScanline @ trying to make good use of pipeline here\r
cc68a136 331 ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
332\r
333 moveq r12, r12, lsl #10\r
334 movne r12, r12, lsl #13\r
335 and r12, r12, #(7<<13) @ r12=(ts->nametab<<1) (halfword compliant)\r
336\r
337 ldrh r8, [r11, #12]\r
338 ldrb r7, [r11, #11]\r
339 ldr r2, [r2]\r
340\r
341 mov r4, r8, lsr #8 @ pvid->reg[13]\r
342 mov r4, r4, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)\r
343 tst r7, #2\r
b6d7ac70 344 addne r4, r4, r2, lsl #2 @ htab+=DrawScanline<<1; // Offset by line\r
cc68a136 345 tst r7, #1\r
346 biceq r4, r4, #0x1f @ htab&=~0xf; // Offset by tile\r
347 add r4, r4, r0, lsl #1 @ htab+=plane\r
348 bic r4, r4, #0x00ff0000 @ just in case\r
349 ldrh r3, [lr, r4] @ r3=hscroll\r
350\r
351 tst r7, #4\r
352 bne .DrawStrip_vsscroll\r
353\r
354 @ Get vertical scroll value:\r
355 add r7, lr, #0x012000\r
356 add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)\r
357 ldr r7, [r7]\r
358\r
359 tst r8, #2\r
360 tstne r8, #4\r
361 bne .DrawStrip_interlace\r
362\r
363 tst r0, r0\r
364 movne r7, r7, lsr #16\r
365\r
366 @ Find the line in the name table\r
367 add r2, r2, r7\r
368 and r2, r2, r1\r
369 mov r4, r2, lsr #3\r
370 add r10, r10, #1 @ shift[width]++\r
371 add r12, r12, r4, lsl r10 @ nametab+=(ts.line>>3)<<shift[width];\r
372\r
373 @ ldmia r0, {r1,r2,r3,r5,r6,r9} @ r2=line, r3=ts->hscroll, r5=ts->xmask, r6=ts->hc, r9=ts->cells\r
cc68a136 374\r
375 and r10,r2, #7\r
376 mov r10,r10, lsl #1 @ r10=ty=(ts->line&7)<<1;\r
377 orr r10,r10, r9, lsl #24\r
378\r
379 rsb r8, r3, #0\r
380 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3\r
381\r
382 sub r1, r3, #1\r
383 and r1, r1, #7\r
384 add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1\r
385\r
386 tst r9, #1<<31\r
387 mov r3, #0\r
740da8c6 388 orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty)\r
cc68a136 389 movne r3, #0x40 @ default to shadowed pal on sh mode\r
390\r
cc68a136 391 cmp r7, #8\r
392 addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll\r
393\r
83c093a4 394 and r9, r9, #0xff00\r
395 add r8, r8, r9, lsr #8 @ tilex+=cellskip\r
396 add r7, r7, r9, lsr #5 @ dx+=cellskip<<3;\r
397 sub r10,r10,r9, lsl #16 @ cells-=cellskip\r
83c093a4 398\r
cc68a136 399 @ cache some stuff to avoid mem access\r
ea8c405f 400.if OVERRIDE_HIGHCOL\r
401 ldr r11,=HighCol\r
402 mov r0, #0xf\r
403 ldr r11,[r11]\r
404.else\r
cc68a136 405 ldr r11,=HighCol\r
406 mov r0, #0xf\r
ea8c405f 407.endif\r
408\r
409 mvn r9, #0 @ r9=prevcode=-1\r
cc68a136 410 add r1, r11, r7 @ r1=pdest\r
411\r
412\r
413 @ r4 & r7 are scratch in this loop\r
414.dsloop_subr1:\r
415 sub r1, r1, #8\r
416.dsloop: @ 40-41 times\r
417 subs r10,r10, #0x01000000\r
418 bmi .dsloop_exit\r
419\r
420.dsloop_enter:\r
421 and r7, r5, r8\r
422 add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)\r
423 ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend)\r
424\r
425 add r1, r1, #8\r
426 add r8, r8, #1\r
427\r
428 tst r7, #0x8000\r
429 bne .DrawStrip_hiprio\r
430\r
431 cmp r7, r9\r
432 beq .DrawStrip_samecode @ we know stuff about this tile already\r
433\r
434 mov r9, r7 @ remember code\r
740da8c6 435 orr r10, r10, #1<<21 @ seen non hi-prio tile\r
cc68a136 436\r
437 movs r2, r9, lsl #20 @ if (code&0x1000)\r
438 mov r2, r2, lsl #1\r
439 add r2, r2, r10, lsl #17\r
440 mov r2, r2, lsr #17\r
441 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
442\r
443 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
444\r
445 bic r7, r3, #0x3f\r
446 and r3, r9, #0x6000\r
447 add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
448\r
449.DrawStrip_samecode:\r
450 tst r2, r2\r
451 beq .dsloop @ tileline blank\r
452\r
453 cmp r2, r2, ror #4\r
454 beq .DrawStrip_SingleColor @ tileline singlecolor \r
455\r
456 tst r9, #0x0800\r
7a7c6476 457 bne .DrawStrip_TileFlip\r
cc68a136 458\r
459 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
cc68a136 460.DrawStrip_TileNorm:\r
461 TileNorm r0\r
462 b .dsloop\r
463\r
7a7c6476 464.DrawStrip_TileFlip:\r
465 TileFlip r0\r
466 b .dsloop\r
467\r
cc68a136 468.DrawStrip_SingleColor:\r
469 and r4, r2, #0xf\r
470 orr r4, r3, r4\r
471 orr r4, r4, r4, lsl #8\r
472 tst r1, #1 @ not aligned?\r
473 strneb r4, [r1], #1\r
474 streqh r4, [r1], #2\r
475 strh r4, [r1], #2\r
476 strh r4, [r1], #2\r
477 strh r4, [r1], #2\r
478 strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
479 b .dsloop_subr1\r
480\r
cc68a136 481.DrawStrip_hiprio_maybempt:\r
482 cmp r7, r9\r
483 beq .dsloop @ must've been empty, otherwise we wouldn't get here\r
484 movs r2, r7, lsl #20 @ if (code&0x1000)\r
485 mov r2, r2, lsl #1\r
486 add r2, r2, r10, lsl #17\r
487 mov r2, r2, lsr #17\r
488 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
489 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
490 mov r9, r7 @ remember code\r
491 tst r2, r2\r
740da8c6 492 beq .dsloop\r
493 orr r10, r10, #1<<22\r
494\r
495.DrawStrip_hiprio:\r
496 tst r10, #0x00c00000\r
497 beq .DrawStrip_hiprio_maybempt\r
498 sub r0, r1, r11\r
499 orr r7, r7, r0, lsl #16\r
500 orr r7, r7, r10, lsl #25 @ (ty<<25)\r
501 tst r7, #0x1000\r
502 eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26;\r
503 str r7, [r6], #4 @ cache hi priority tile\r
504 mov r0, #0xf\r
cc68a136 505 b .dsloop\r
506\r
507.dsloop_exit:\r
740da8c6 508 tst r10, #1<<21 @ seen non hi-prio tile\r
509 ldreq r1, =rendstatus\r
cc68a136 510 mov r0, #0\r
740da8c6 511 ldreq r2, [r1]\r
cc68a136 512 str r0, [r6] @ terminate the cache list\r
283fec1b 513 orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles\r
740da8c6 514 streq r2, [r1]\r
cc68a136 515\r
516 ldmfd sp!, {r4-r11,lr}\r
517 bx lr\r
518\r
6d7acf9e 519@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
cc68a136 520\r
521.DrawStrip_vsscroll:\r
6d7acf9e 522 rsb r8, r3, #0\r
523 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3\r
7b802576 524 bic r8, r8, #0x3fc00000\r
525 orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[21:0])\r
cc68a136 526\r
b6d7ac70 527 ldr r4, =DrawScanline\r
6d7acf9e 528 orr r5, r1, r10, lsl #24\r
529 ldr r4, [r4]\r
530 sub r1, r3, #1\r
531 orr r5, r5, r4, lsl #16 @ r5=(shift_width[31:24]|scanline[23:16]|ymask[15:0])\r
532 and r1, r1, #7\r
533 add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1\r
cc68a136 534\r
6d7acf9e 535 mov r10,r9, lsl #16\r
83c093a4 536 tst r0, #1\r
6d7acf9e 537 orrne r10,r10, #0x8000\r
538 tst r9, #1<<31\r
539 mov r3, #0\r
540 orr r10,r10, #0xff000000 @ will be adjusted on entering loop\r
83c093a4 541 orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0])\r
6d7acf9e 542 movne r3, #0x40 @ default to shadowed pal on sh mode\r
cc68a136 543\r
83c093a4 544 cmp r7, #8\r
545 subne r10,r10, #0x01000000 @ have hscroll, start with negative cell\r
546\r
547 and r9, r9, #0xff00\r
548 add r8, r8, r9, lsr #8 @ tilex+=cellskip\r
549 add r7, r7, r9, lsr #5 @ dx+=cellskip<<3;\r
550 add r10,r10,r9, lsl #16 @ cell+=cellskip\r
cc68a136 551\r
6d7acf9e 552 @ cache some stuff to avoid mem access\r
ea8c405f 553.if OVERRIDE_HIGHCOL\r
6d7acf9e 554 ldr r11,=HighCol\r
555 mov r0, #0xf\r
ea8c405f 556 ldr r11,[r11]\r
557.else\r
558 ldr r11,=HighCol\r
559 mov r0, #0xf\r
560.endif\r
6d7acf9e 561\r
ea8c405f 562 mvn r9, #0 @ r9=prevcode=-1\r
563 add r1, r11, r7 @ r1=pdest\r
6d7acf9e 564\r
565 @ r4 & r7 are scratch in this loop\r
566.dsloop_vs_subr1:\r
567 sub r1, r1, #8\r
568.dsloop_vs: @ 40-41 times\r
569 add r10,r10, #0x01000000\r
570 and r4, r10, #0x003f0000\r
571 cmp r4, r10, asr #8\r
740da8c6 572 ble .dsloop_vs_exit\r
6d7acf9e 573\r
574 @ calc offset and read tileline code to r7, also calc ty\r
575 add r7, lr, #0x012000\r
576 add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)\r
577 add r7, r7, r10,asr #23 @ vsram + ((cell&~1)<<1)\r
578 bic r7, r7, #3\r
579 tst r10,#0x8000 @ plane1?\r
580 addne r7, r7, #2\r
581 ldrh r7, [r7] @ r7=vscroll\r
582\r
583 bic r10,r10,#0xff @ clear old ty\r
7b802576 584 and r4, r5, #0xff0000 @ scanline\r
585 add r4, r4, r7, lsl #16 @ ... += vscroll\r
586 and r4, r4, r5, lsl #16 @ ... &= ymask\r
6d7acf9e 587 and r7, r4, #0x70000\r
588 orr r10,r10,r7, lsr #15 @ new ty\r
589\r
590 mov r4, r4, lsr #19\r
591 mov r7, r5, lsr #24\r
592 mov r4, r4, lsl r7 @ nametabadd\r
593\r
594 and r7, r8, r8, lsr #25\r
595 add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)\r
596 add r7, r7, r4, lsl #1\r
597 ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend)\r
598\r
599 add r1, r1, #8\r
600 add r8, r8, #1\r
601\r
602 tst r7, #0x8000\r
603 bne .DrawStrip_vs_hiprio\r
604\r
605 cmp r7, r9\r
606 beq .DrawStrip_vs_samecode @ we know stuff about this tile already\r
607\r
608 mov r9, r7 @ remember code\r
7b802576 609 orr r8, r8, #(1<<24)@ seen non hi-prio tile\r
6d7acf9e 610\r
611 movs r2, r9, lsl #20 @ if (code&0x1000)\r
612 mov r2, r2, lsl #1\r
613 add r2, r2, r10, lsl #17\r
614 mov r2, r2, lsr #17\r
615 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
616\r
617 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
618\r
619 bic r7, r3, #0x3f\r
620 and r3, r9, #0x6000\r
621 add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
622\r
623.DrawStrip_vs_samecode:\r
624 tst r2, r2\r
625 beq .dsloop_vs @ tileline blank\r
626\r
627 cmp r2, r2, ror #4\r
628 beq .DrawStrip_vs_SingleColor @ tileline singlecolor \r
629\r
630 tst r9, #0x0800\r
7a7c6476 631 bne .DrawStrip_vs_TileFlip\r
6d7acf9e 632\r
633 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
6d7acf9e 634.DrawStrip_vs_TileNorm:\r
635 TileNorm r0\r
636 b .dsloop_vs\r
637\r
7a7c6476 638.DrawStrip_vs_TileFlip:\r
639 TileFlip r0\r
640 b .dsloop_vs\r
641\r
6d7acf9e 642.DrawStrip_vs_SingleColor:\r
643 and r4, r2, #0xf\r
644 orr r4, r3, r4\r
645 orr r4, r4, r4, lsl #8\r
646 tst r1, #1 @ not aligned?\r
647 strneb r4, [r1], #1\r
648 streqh r4, [r1], #2\r
649 strh r4, [r1], #2\r
650 strh r4, [r1], #2\r
651 strh r4, [r1], #2\r
652 strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
653 b .dsloop_vs_subr1\r
654\r
655.DrawStrip_vs_hiprio:\r
656 tst r10, #0x00c00000\r
657 beq .DrawStrip_vs_hiprio_maybempt\r
658 sub r0, r1, r11\r
659 orr r7, r7, r0, lsl #16\r
660 orr r7, r7, r10, lsl #25 @ (ty<<25)\r
661 tst r7, #0x1000\r
662 eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26;\r
663 str r7, [r6], #4 @ cache hi priority tile\r
664 mov r0, #0xf\r
665 b .dsloop_vs\r
666\r
667.DrawStrip_vs_hiprio_maybempt:\r
668 cmp r7, r9\r
669 beq .dsloop_vs @ must've been empty, otherwise we wouldn't get here\r
670 movs r2, r7, lsl #20 @ if (code&0x1000)\r
671 mov r2, r2, lsl #1\r
672 add r2, r2, r10, lsl #17\r
673 mov r2, r2, lsr #17\r
674 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
675 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
676 mov r9, r7 @ remember code\r
677 tst r2, r2\r
678 orrne r10, r10, #1<<22\r
679 bne .DrawStrip_vs_hiprio\r
680 b .dsloop_vs\r
681\r
740da8c6 682.dsloop_vs_exit:\r
7b802576 683 tst r8, #(1<<24) @ seen non hi-prio tile\r
740da8c6 684 ldreq r1, =rendstatus\r
685 mov r0, #0\r
686 ldreq r2, [r1]\r
687 str r0, [r6] @ terminate the cache list\r
283fec1b 688 orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles\r
740da8c6 689 streq r2, [r1]\r
690\r
691 ldmfd sp!, {r4-r11,lr}\r
692 bx lr\r
693\r
6d7acf9e 694\r
695@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
cc68a136 696\r
697@ interlace mode 2? Sonic 2?\r
698.DrawStrip_interlace:\r
699 tst r0, r0\r
700 moveq r7, r7, lsl #21\r
701 movne r7, r7, lsl #5\r
702\r
703 @ Find the line in the name table\r
b6d7ac70 704 add r2, r7, r2, lsl #22 @ r2=(vscroll+(DrawScanline<<1))<<21 (11 bits);\r
cc68a136 705 orr r1, r1, #0x80000000\r
706 and r2, r2, r1, ror #10 @ &((ymask<<1)|1)<<21;\r
707 mov r2, r2, lsr #21\r
708 mov r4, r2, lsr #4\r
709 mov r12, r12, lsr #1 @ halfwords\r
710 add r0, r12, r4, lsl r10 @ nametab+=(ts.line>>4)<<shift[width];\r
711 and r9, r9, #0xff\r
712\r
713 sub sp, sp, #6*4\r
714 stmia sp, {r0,r2,r3,r5,r6,r9}\r
715\r
716 mov r0, sp\r
717 bl DrawStripInterlace @ struct TileStrip *ts\r
718\r
719 add sp, sp, #6*4\r
720 ldmfd sp!, {r4-r11,lr}\r
721 bx lr\r
722\r
723.pool\r
724\r
725@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
726\r
727\r
728.global BackFill @ int reg7, int sh\r
729\r
730BackFill:\r
731 stmfd sp!, {r4-r9,lr}\r
732\r
ea8c405f 733.if OVERRIDE_HIGHCOL\r
734 ldr lr, =HighCol\r
735 mov r0, r0, lsl #26\r
736 ldr lr, [lr]\r
737 mov r0, r0, lsr #26\r
738 add lr, lr, #8\r
739.else\r
cc68a136 740 ldr lr, =(HighCol+8)\r
cc68a136 741 mov r0, r0, lsl #26\r
742 mov r0, r0, lsr #26\r
ea8c405f 743.endif\r
744\r
cc68a136 745 orr r0, r0, r1, lsl #6\r
746 orr r0, r0, r0, lsl #8\r
747 orr r0, r0, r0, lsl #16\r
748\r
749 mov r1, r0\r
750 mov r2, r0\r
751 mov r3, r0\r
752 mov r4, r0\r
753 mov r5, r0\r
754 mov r6, r0\r
755 mov r7, r0\r
756\r
757 @ go go go!\r
758 stmia lr!, {r0-r7} @ 10*8*4\r
759 stmia lr!, {r0-r7}\r
760 stmia lr!, {r0-r7}\r
761 stmia lr!, {r0-r7}\r
762 stmia lr!, {r0-r7}\r
763 stmia lr!, {r0-r7}\r
764 stmia lr!, {r0-r7}\r
765 stmia lr!, {r0-r7}\r
766 stmia lr!, {r0-r7}\r
767 stmia lr!, {r0-r7}\r
768\r
769 ldmfd sp!, {r4-r9,r12}\r
770 bx r12\r
771\r
772\r
773@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
774\r
775\r
7a7c6476 776.global DrawTilesFromCache @ int *hc, int sh, int rlim\r
cc68a136 777\r
778DrawTilesFromCache:\r
779 stmfd sp!, {r4-r8,r11,lr}\r
780\r
cc68a136 781 @ cache some stuff to avoid mem access\r
ea8c405f 782.if OVERRIDE_HIGHCOL\r
cc68a136 783 ldr r11,=HighCol\r
cc68a136 784 mov r12,#0xf\r
ea8c405f 785 ldr r11,[r11]\r
786.else\r
787 ldr r11,=HighCol\r
788 mov r12,#0xf\r
789.endif\r
790 ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
cc68a136 791\r
740da8c6 792 mvn r5, #0 @ r5=prevcode=-1\r
7a7c6476 793 ands r8, r1, #1\r
794 orr r8, r8, r2, lsl #1\r
740da8c6 795 bne .dtfc_check_rendflags\r
796\r
cc68a136 797 @ scratch: r4, r7\r
798.dtfc_loop:\r
799 ldr r6, [r0], #4 @ read code\r
800 movs r1, r6, lsr #16 @ r1=dx;\r
801 ldmeqfd sp!, {r4-r8,r11,pc} @ dx is never zero, this must be a terminator, return\r
7a7c6476 802 bic r4, r1, #0xfe00\r
803 add r1, r11, r4 @ r1=pdest\r
cc68a136 804\r
805 mov r7, r6, lsl #16\r
806 cmp r5, r7, lsr #16\r
807 beq .dtfc_samecode @ if (code==prevcode)\r
808\r
809 mov r5, r7, lsr #16\r
810\r
811 mov r2, r5, lsl #21\r
812 mov r2, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4;\r
813 add r2, r2, r6, lsr #25 @ addr+=ty\r
814\r
815 and r3, r5, #0x6000\r
816 mov r3, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
817\r
818 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
819\r
820.dtfc_samecode:\r
7a7c6476 821 rsbs r4, r4, r8, lsr #1\r
822 bmi .dtfc_cut_tile\r
823\r
824 tst r8, #1\r
cc68a136 825 bne .dtfc_shadow\r
826\r
827 tst r2, r2\r
828 beq .dtfc_loop\r
829\r
830 cmp r2, r2, ror #4\r
831 beq .dtfc_SingleColor @ tileline singlecolor \r
832\r
833 tst r5, #0x0800\r
7a7c6476 834 bne .dtfc_TileFlip\r
cc68a136 835\r
836 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
cc68a136 837.dtfc_TileNorm:\r
838 TileNorm r12\r
839 b .dtfc_loop\r
840\r
7a7c6476 841.dtfc_TileFlip:\r
842 TileFlip r12\r
843 b .dtfc_loop\r
844\r
cc68a136 845.dtfc_SingleColor:\r
846 and r4, r2, #0xf\r
847 orr r4, r3, r4\r
848 orr r4, r4, r4, lsl #8\r
849 tst r1, #1 @ not aligned?\r
850 strneb r4, [r1], #1\r
851 streqh r4, [r1], #2\r
852 strh r4, [r1], #2\r
853 strh r4, [r1], #2\r
854 strh r4, [r1], #2\r
855 strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
856 b .dtfc_loop\r
857\r
858.dtfc_shadow:\r
859 tst r2, r2\r
860 beq .dtfc_shadow_blank\r
861\r
862 cmp r2, r2, ror #4\r
863 beq .dtfc_SingleColor @ tileline singlecolor \r
864\r
865 tst r5, #0x0800\r
7a7c6476 866 bne .dtfc_TileFlipShHP\r
cc68a136 867\r
868 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
cc68a136 869.dtfc_TileNormShHP:\r
870 TileNormShHP\r
871 b .dtfc_loop\r
872\r
7a7c6476 873.dtfc_TileFlipShHP:\r
874 TileFlipShHP\r
875 b .dtfc_loop\r
876\r
cc68a136 877.dtfc_shadow_blank:\r
07abbab1 878 tst r1, #1\r
879 ldrneb r4, [r1]\r
bfa12428 880 mov r6, #0xbf\r
881 and r4, r4, #0xbf\r
07abbab1 882 strneb r4, [r1], #1\r
883 ldrh r4, [r1]\r
884 orr r6, r6, r6, lsl #8\r
885 and r4, r4, r6\r
886 strh r4, [r1], #2\r
887 ldrh r4, [r1]\r
888 and r4, r4, r6\r
889 strh r4, [r1], #2\r
890 ldrh r4, [r1]\r
891 and r4, r4, r6\r
892 strh r4, [r1], #2\r
893 ldrh r4, [r1]\r
894 and r4, r4, r6\r
895 streqh r4, [r1]\r
896 strneb r4, [r1]\r
cc68a136 897 b .dtfc_loop\r
898\r
7a7c6476 899.dtfc_cut_tile:\r
900 add r4, r4, #7 @ 0-6\r
901 mov r4, r4, lsl #2\r
902 mov r12,#0xf<<28\r
903 mov r12,r12,asr r4\r
904 mov r2, r2, ror #16\r
905 tst r5, #0x0800 @ flipped?\r
906 mvnne r12,r12\r
907 and r2, r2, r12\r
908 mov r2, r2, ror #16\r
909 mov r12,#0xf\r
910 tst r8, #1\r
911 bne .dtfc_shadow\r
912 tst r2, r2\r
913 beq .dtfc_loop\r
914 tst r5, #0x0800\r
915 beq .dtfc_TileNorm\r
916 b .dtfc_TileFlip\r
917\r
740da8c6 918@ check if we have detected layer covered with hi-prio tiles:\r
919.dtfc_check_rendflags:\r
920 ldr r1, =rendstatus\r
921 ldr r2, [r1]\r
283fec1b 922 tst r2, #(PDRAW_PLANE_HI_PRIO|PDRAW_SHHI_DONE)\r
740da8c6 923 beq .dtfc_loop\r
7a7c6476 924 bic r8, r8, #1 @ sh/hi mode off\r
283fec1b 925 tst r2, #PDRAW_SHHI_DONE\r
740da8c6 926 bne .dtfc_loop @ already processed\r
283fec1b 927 orr r2, r2, #PDRAW_SHHI_DONE\r
740da8c6 928 str r2, [r1]\r
929\r
930 add r1, r11,#8\r
07abbab1 931 mov r3, #320/4/4\r
bfa12428 932 mov r6, #0xbf\r
740da8c6 933 orr r6, r6, r6, lsl #8\r
934 orr r6, r6, r6, lsl #16\r
935.dtfc_loop_shprep:\r
07abbab1 936 ldmia r1, {r2,r4,r5,r7}\r
740da8c6 937 subs r3, r3, #1\r
07abbab1 938 and r2, r2, r6\r
939 and r4, r4, r6\r
940 and r5, r5, r6\r
941 and r7, r7, r6\r
942 stmia r1!,{r2,r4,r5,r7}\r
943 bne .dtfc_loop_shprep\r
944\r
945 mvn r5, #0 @ r5=prevcode=-1\r
946 b .dtfc_loop\r
740da8c6 947\r
cc68a136 948.pool\r
949\r
950@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
951\r
952\r
fbc65db7 953.global DrawSpritesSHi @ unsigned char *sprited\r
e352c3af 954\r
955DrawSpritesSHi:\r
fbc65db7 956 ldr r3, [r0]\r
957 mov r12,#0xff\r
e352c3af 958 ands r3, r3, #0x7f\r
959 bxeq lr\r
cc68a136 960\r
cc68a136 961 stmfd sp!, {r4-r11,lr}\r
fbc65db7 962 strb r12,[r0,#2] @ set end marker\r
963 add r10,r0, #3 @ r10=HighLnSpr end\r
e352c3af 964 add r10,r10,r3 @ r10=HighLnSpr end\r
965\r
ea8c405f 966.if OVERRIDE_HIGHCOL\r
cc68a136 967 ldr r11,=HighCol\r
ea8c405f 968 mov r12,#0xf\r
969 ldr r11,[r11]\r
970.else\r
971 ldr r11,=HighCol\r
972 mov r12,#0xf\r
973.endif\r
cc68a136 974 ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
cc68a136 975\r
cc68a136 976\r
e352c3af 977DrawSpriteSHi:\r
978 @ draw next sprite\r
979 ldrb r0, [r10,#-1]!\r
980 ldr r1, =HighPreSpr\r
fbc65db7 981@ ldr r8, [sp, #-4]\r
e352c3af 982 cmp r0, #0xff\r
983 ldmeqfd sp!, {r4-r11,pc} @ end of list\r
984 and r0, r0, #0x7f\r
985 add r0, r1, r0, lsl #3\r
986\r
987 ldr r9, [r0, #4] @ sprite[1]\r
988 mov r2, r9, asr #16 @ r2=sx\r
cc68a136 989\r
e352c3af 990 mov r9, r9, lsl #16\r
991 mov r3, r9, lsr #31 @ priority\r
992 mov r9, r9, lsr #16\r
fbc65db7 993@ orr r9, r9, r8, lsl #31 @ r9=code|sh[31] @@ sh is always on here now\r
e352c3af 994 and r4, r9, #0x6000\r
995 orr r9, r9, r4, lsl #16\r
fbc65db7 996 orr r9, r9, #0x90000000 @ r9=scc1 ???? ... <code> (s=shadow/hilight, cc=pal)\r
e352c3af 997 cmp r12,r9, lsr #28 @ sh/hi with pal3?\r
e54507e8 998 cmpne r3, #1 @ if not, is it hi prio?\r
e352c3af 999 bne DrawSpriteSHi @ non-operator low sprite, already drawn\r
cc68a136 1000\r
e352c3af 1001 ldr r3, [r0] @ sprite[0]\r
1002 ldr r7, =DrawScanline\r
1003 mov r6, r3, lsr #28\r
1004 sub r6, r6, #1 @ r6=width-1 (inc later)\r
1005 mov r5, r3, lsr #24\r
1006 and r5, r5, #7 @ r5=height\r
cc68a136 1007\r
e352c3af 1008 mov r0, r3, lsl #16 @ r4=sy<<16 (tmp)\r
cc68a136 1009\r
e352c3af 1010 ldr r7, [r7]\r
1011 sub r7, r7, r0, asr #16 @ r7=row=DrawScanline-sy\r
cc68a136 1012\r
e352c3af 1013 tst r9, #0x1000\r
1014 movne r0, r5, lsl #3\r
1015 subne r0, r0, #1\r
1016 subne r7, r0, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y\r
cc68a136 1017\r
e352c3af 1018 add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down\r
1019 tst r9, #0x0800\r
1020 mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1);\r
1021 rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now\r
cc68a136 1022\r
e352c3af 1023 mov r8, r8, lsl #21\r
1024 mov r8, r8, lsr #17\r
1025 and r7, r7, #7\r
1026 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address\r
1027\r
1028 mov r5, r5, lsl #4 @ delta<<=4; // Delta of address\r
e352c3af 1029 mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30);\r
1030\r
1031 add r6, r6, #1 @ inc now\r
1032 adds r0, r2, #0 @ mov sx to r0 and set ZV flags\r
1033 b .dsprShi_loop_enter\r
1034\r
1035.dsprShi_loop:\r
1036 subs r6, r6, #1 @ width--\r
1037 beq DrawSpriteSHi\r
1038 adds r0, r0, #8 @ sx+=8\r
1039 add r8, r8, r5 @ tile+=delta\r
1040\r
1041.dsprShi_loop_enter:\r
1042 ble .dsprShi_loop @ sx <= 0\r
cc68a136 1043 cmp r0, #328\r
e352c3af 1044 bge DrawSpriteSHi\r
cc68a136 1045\r
1046 mov r8, r8, lsl #17\r
e352c3af 1047 mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address\r
cc68a136 1048\r
e352c3af 1049 ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
1050 add r1, r11, r0 @ r1=pdest\r
cc68a136 1051 tst r2, r2\r
e352c3af 1052 beq .dsprShi_loop\r
cc68a136 1053\r
e352c3af 1054 cmp r12, r9, lsr #28\r
1055 beq .dsprShi_shadow\r
cc68a136 1056\r
1057 cmp r2, r2, ror #4\r
e352c3af 1058 beq .dsprShi_SingleColor @ tileline singlecolor \r
cc68a136 1059\r
e352c3af 1060 tst r9, #0x0800\r
1061 bne .dsprShi_TileFlip\r
cc68a136 1062\r
e352c3af 1063 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
1064@ scratch: r4, r7\r
1065.dsprShi_TileNorm:\r
cc68a136 1066 TileNorm r12\r
e352c3af 1067 b .dsprShi_loop\r
cc68a136 1068\r
e352c3af 1069.dsprShi_TileFlip:\r
7a7c6476 1070 TileFlip r12\r
e352c3af 1071 b .dsprShi_loop\r
7a7c6476 1072\r
e352c3af 1073.dsprShi_SingleColor:\r
cc68a136 1074 and r4, r2, #0xf\r
1075 orr r4, r3, r4\r
1076 orr r4, r4, r4, lsl #8\r
e352c3af 1077 tst r0, #1 @ not aligned?\r
cc68a136 1078 strneb r4, [r1], #1\r
1079 streqh r4, [r1], #2\r
1080 strh r4, [r1], #2\r
1081 strh r4, [r1], #2\r
1082 strh r4, [r1], #2\r
1083 strneb r4, [r1], #1\r
e352c3af 1084 b .dsprShi_loop\r
cc68a136 1085\r
e352c3af 1086.dsprShi_shadow:\r
1087 tst r9, #0x8000\r
1088 beq .dsprShi_shadow_lowpri\r
07abbab1 1089\r
cc68a136 1090 cmp r2, r2, ror #4\r
e352c3af 1091 beq .dsprShi_singlec_sh\r
cc68a136 1092\r
e352c3af 1093 tst r9, #0x0800\r
1094 bne .dsprShi_TileFlip_sh\r
cc68a136 1095\r
e54507e8 1096 @ (r1=pdest, r2=pixels8, r3=pal) r4, r7: scratch, r12: helper pattern\r
e352c3af 1097.dsprShi_TileNorm_sh:\r
cc68a136 1098 TileNormSh\r
e352c3af 1099 b .dsprShi_loop\r
cc68a136 1100\r
e352c3af 1101.dsprShi_TileFlip_sh:\r
7a7c6476 1102 TileFlipSh\r
e352c3af 1103 b .dsprShi_loop\r
7a7c6476 1104\r
e352c3af 1105.dsprShi_singlec_sh:\r
cc68a136 1106 cmp r2, #0xe0000000\r
e352c3af 1107 bcc .dsprShi_SingleColor @ normal singlecolor tileline (carry inverted in ARM)\r
cc68a136 1108 tst r2, #0x10000000\r
e352c3af 1109 bne .dsprShi_sh_sh\r
cc68a136 1110 TileSingleHi\r
e352c3af 1111 b .dsprShi_loop\r
cc68a136 1112\r
e352c3af 1113.dsprShi_sh_sh:\r
cc68a136 1114 TileSingleSh\r
e352c3af 1115 b .dsprShi_loop\r
cc68a136 1116\r
e352c3af 1117.dsprShi_shadow_lowpri:\r
1118 tst r9, #0x800\r
1119 bne .dsprShi_TileFlip_sh_lp\r
07abbab1 1120\r
e352c3af 1121.dsprShi_TileNorm_sh_lp:\r
07abbab1 1122 TileNormSh_onlyop_lp\r
e352c3af 1123 b .dsprShi_loop\r
07abbab1 1124\r
e352c3af 1125.dsprShi_TileFlip_sh_lp:\r
07abbab1 1126 TileFlipSh_onlyop_lp\r
e352c3af 1127 b .dsprShi_loop\r
07abbab1 1128\r
cc68a136 1129.pool\r
1130\r
1131@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1132\r
fbc65db7 1133.global DrawAllSprites @ unsigned char *sprited, int prio, int sh\r
283fec1b 1134\r
1135DrawAllSprites:\r
1136 ldr r3, =rendstatus\r
fbc65db7 1137 orr r1, r2, r1, lsl #1\r
283fec1b 1138 ldr r12,[r3]\r
0fc0e241 1139 tst r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED)\r
283fec1b 1140 beq das_no_prep\r
fbc65db7 1141 stmfd sp!, {r0,r1,lr}\r
283fec1b 1142 and r0, r12,#PDRAW_DIRTY_SPRITES\r
0fc0e241 1143 bic r12,r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED)\r
283fec1b 1144 str r12,[r3]\r
1145 bl PrepareSprites\r
fbc65db7 1146 ldmfd sp!, {r0,r1,lr}\r
283fec1b 1147\r
1148das_no_prep:\r
fbc65db7 1149 ldr r3, [r0]\r
283fec1b 1150 ands r3, r3, #0x7f\r
283fec1b 1151 bxeq lr\r
1152\r
1153 @ time to do some real work\r
1154 stmfd sp!, {r4-r11,lr}\r
283fec1b 1155 mov r12,#0xff\r
fbc65db7 1156 strb r12,[r0,#2] @ set end marker\r
1157 add r10,r0, #3\r
283fec1b 1158 add r10,r10,r3 @ r10=HighLnSpr end\r
1159\r
97a7f774 1160 str r1, [sp, #-4] @ no calls after this point\r
283fec1b 1161\r
1162.if OVERRIDE_HIGHCOL\r
1163 ldr r11,=HighCol\r
1164 mov r12,#0xf\r
1165 ldr r11,[r11]\r
1166.else\r
1167 ldr r11,=HighCol\r
1168 mov r12,#0xf\r
1169.endif\r
1170 ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
1171\r
1172@ + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: horiz. size\r
cc68a136 1173@ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8\r
1174\r
283fec1b 1175DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites\r
1176 @ draw next sprite\r
1177 ldrb r0, [r10,#-1]!\r
1178 ldr r1, =HighPreSpr\r
283fec1b 1179 ldr r8, [sp, #-4]\r
e352c3af 1180 mov r2, r0, lsr #7\r
1181 cmp r0, #0xff\r
1182 ldmeqfd sp!, {r4-r11,pc} @ end of list\r
97a7f774 1183 cmp r2, r8, lsr #1\r
e352c3af 1184 bne DrawSprite @ wrong priority\r
283fec1b 1185 and r0, r0, #0x7f\r
1186 add r0, r1, r0, lsl #3\r
cc68a136 1187\r
283fec1b 1188@ stmfd sp!, {r4-r9,r11,lr}\r
1189@ orr r8, r2, r1, lsl #4\r
cc68a136 1190\r
1191 ldr r3, [r0] @ sprite[0]\r
b6d7ac70 1192 ldr r7, =DrawScanline\r
cc68a136 1193 mov r6, r3, lsr #28\r
1194 sub r6, r6, #1 @ r6=width-1 (inc later)\r
1195 mov r5, r3, lsr #24\r
1196 and r5, r5, #7 @ r5=height\r
1197\r
1198 mov r4, r3, lsl #16 @ r4=sy<<16 (tmp)\r
1199\r
1200 ldr r7, [r7]\r
1201 ldr r9, [r0, #4]\r
b6d7ac70 1202 sub r7, r7, r4, asr #16 @ r7=row=DrawScanline-sy\r
cc68a136 1203\r
cc68a136 1204 mov r2, r9, asr #16 @ r2=sx\r
e5fa9817 1205 mov r9, r9, lsl #16\r
1206 mov r9, r9, lsr #16\r
97a7f774 1207 orr r9, r9, r8, lsl #31 @ r9=code|sh[31]\r
cc68a136 1208\r
1209 tst r9, #0x1000\r
1210 movne r4, r5, lsl #3\r
1211 subne r4, r4, #1\r
1212 subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y\r
1213\r
07abbab1 1214 add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down\r
cc68a136 1215 tst r9, #0x0800\r
1216 mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1);\r
1217 rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now\r
1218\r
07abbab1 1219 mov r8, r8, lsl #21\r
1220 mov r8, r8, lsr #17\r
cc68a136 1221 and r7, r7, #7\r
1222 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address\r
1223\r
e5fa9817 1224.dspr_continue:\r
cc68a136 1225 @ cache some stuff to avoid mem access\r
cc68a136 1226 mov r5, r5, lsl #4 @ delta<<=4; // Delta of address\r
1227 and r4, r9, #0x6000\r
1228 orr r9, r9, r4, lsl #16\r
97a7f774 1229 orrs r9, r9, #0x10000000 @ r9=scc1 ???? ... <code> (s=shadow/hilight, cc=pal)\r
cc68a136 1230\r
cc68a136 1231 mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30);\r
97a7f774 1232 orrmi r3, r3, #0x40 @ for sh/hi\r
cc68a136 1233\r
1234 add r6, r6, #1 @ inc now\r
1235 adds r0, r2, #0 @ mov sx to r0 and set ZV flags\r
1236 b .dspr_loop_enter\r
1237\r
1238.dspr_loop:\r
1239 subs r6, r6, #1 @ width--\r
283fec1b 1240 beq DrawSprite\r
cc68a136 1241 adds r0, r0, #8 @ sx+=8\r
1242 add r8, r8, r5 @ tile+=delta\r
1243\r
1244.dspr_loop_enter:\r
1245 ble .dspr_loop @ sx <= 0\r
1246 cmp r0, #328\r
283fec1b 1247 bge DrawSprite\r
cc68a136 1248\r
1249 mov r8, r8, lsl #17\r
1250 mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address\r
1251\r
1252 ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
1253 add r1, r11, r0 @ r1=pdest\r
1254 tst r2, r2\r
1255 beq .dspr_loop\r
1256\r
1257 cmp r12, r9, lsr #28\r
1258 beq .dspr_shadow\r
1259\r
1260 cmp r2, r2, ror #4\r
1261 beq .dspr_SingleColor @ tileline singlecolor \r
1262\r
1263 tst r9, #0x0800\r
7a7c6476 1264 bne .dspr_TileFlip\r
cc68a136 1265\r
1266 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
cc68a136 1267@ scratch: r4, r7\r
1268.dspr_TileNorm:\r
1269 TileNorm r12\r
1270 b .dspr_loop\r
1271\r
7a7c6476 1272.dspr_TileFlip:\r
1273 TileFlip r12\r
1274 b .dspr_loop\r
1275\r
07abbab1 1276.dspr_singlec_sh:\r
1277 cmp r2, #0xe0000000\r
e54507e8 1278 bcs .dspr_TileNorm_sh @ op. tileline, markop. XXX: maybe add a spec. handler?\r
07abbab1 1279\r
cc68a136 1280.dspr_SingleColor:\r
1281 and r4, r2, #0xf\r
1282 orr r4, r3, r4\r
1283 orr r4, r4, r4, lsl #8\r
1284 tst r0, #1 @ not aligned?\r
1285 strneb r4, [r1], #1\r
1286 streqh r4, [r1], #2\r
1287 strh r4, [r1], #2\r
1288 strh r4, [r1], #2\r
1289 strh r4, [r1], #2\r
1290 strneb r4, [r1], #1\r
1291 b .dspr_loop\r
1292\r
1293.dspr_shadow:\r
1294 cmp r2, r2, ror #4\r
1295 beq .dspr_singlec_sh\r
1296\r
1297 tst r9, #0x0800\r
7a7c6476 1298 bne .dspr_TileFlip_sh\r
cc68a136 1299\r
1300 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
cc68a136 1301.dspr_TileNorm_sh:\r
e54507e8 1302 TileNormSh_markop\r
cc68a136 1303 b .dspr_loop\r
1304\r
7a7c6476 1305.dspr_TileFlip_sh:\r
e54507e8 1306 TileFlipSh_markop\r
cc68a136 1307 b .dspr_loop\r
1308\r
1309\r
cc68a136 1310@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1311\r
1312.global DrawWindow @ int tstart, int tend, int prio, int sh // int *hcache\r
1313\r
1314DrawWindow:\r
1315 stmfd sp!, {r4-r11,lr}\r
1316\r
1317 ldr r11, =(Pico+0x22228) @ Pico.video\r
b6d7ac70 1318 ldr r10, =DrawScanline\r
cc68a136 1319 ldrb r12, [r11, #3] @ pvid->reg[3]\r
1320\r
1321 ldr r10, [r10]\r
1322 ldr r4, [r11, #12]\r
1323 mov r5, r10, lsr #3\r
1324 and r10, r10, #7\r
1325 mov r10, r10, lsl #1 @ r10=ty\r
1326\r
1327 mov r12, r12, lsl #10\r
1328\r
1329 tst r4, #1 @ 40 cell mode?\r
1330 andne r12, r12, #0xf000 @ 0x3c<<10\r
1331 andeq r12, r12, #0xf800\r
1332 addne r12, r12, r5, lsl #7\r
1333 addeq r12, r12, r5, lsl #6 @ nametab\r
1334 add r12, r12, r0, lsl #2 @ +starttile\r
1335\r
1336 ldr r6, =rendstatus\r
0fc0e241 1337 ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
07abbab1 1338 ldr r6, [r6]\r
cc68a136 1339\r
1340 @ fetch the first code now\r
1341 ldrh r7, [lr, r12]\r
1342\r
283fec1b 1343 ands r6, r6, #PDRAW_WND_DIFF_PRIO\r
cc68a136 1344 orr r6, r6, r2\r
cc68a136 1345\r
7292c709 1346 eoreq r8, r2, r7, lsr #15 @ do prio bits differ?\r
1347 cmpeq r8, #1\r
1348 ldmeqfd sp!, {r4-r11,pc} @ yes, assume that whole window uses same priority\r
cc68a136 1349\r
cc68a136 1350 orr r6, r6, r3, lsl #8 @ shadow mode\r
1351\r
1352 sub r8, r1, r0\r
cc68a136 1353\r
1354 @ cache some stuff to avoid mem access\r
ea8c405f 1355.if OVERRIDE_HIGHCOL\r
1356 ldr r11,=HighCol\r
1357 mov r8, r8, lsl #1 @ cells\r
1358 ldr r11,[r11]\r
1359 mvn r9, #0 @ r9=prevcode=-1\r
1360 add r11,r11,#8\r
1361.else\r
cc68a136 1362 ldr r11,=(HighCol+8)\r
ea8c405f 1363 mov r8, r8, lsl #1 @ cells\r
1364 mvn r9, #0 @ r9=prevcode=-1\r
1365.endif\r
07abbab1 1366 add r1, r11, r0, lsl #4 @ r1=pdest\r
cc68a136 1367 mov r0, #0xf\r
1368 b .dwloop_enter\r
1369\r
07abbab1 1370 @ r4,r5 are scratch in this loop\r
cc68a136 1371.dwloop:\r
1372 add r1, r1, #8\r
1373.dwloop_nor1:\r
1374 add r12, r12, #2 @ halfwords\r
1375 ldrh r7, [lr, r12] @ r7=code (int, but from unsigned, no sign extend)\r
1376 subs r8, r8, #1\r
1377 beq .dwloop_end @ done\r
1378\r
1379 eor r5, r6, r7, lsr #15\r
1380 tst r5, #1\r
1381 orrne r6, r6, #2 @ wrong pri\r
1382 bne .dwloop\r
1383\r
1384 cmp r7, r9\r
1385 beq .dw_samecode @ we know stuff about this tile already\r
1386\r
1387.dwloop_enter:\r
1388 mov r9, r7 @ remember code\r
1389\r
1390 movs r2, r9, lsl #20 @ if (code&0x1000)\r
1391 mov r2, r2, lsl #1\r
1392 add r2, r10, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4; addr+=ty\r
1393 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
1394\r
1395 and r3, r9, #0x6000\r
1396 mov r3, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
1397\r
1398 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
1399\r
1400.dw_samecode:\r
1401 tst r6, #0x100\r
1402 bne .dw_shadow\r
1403.dw_shadow_done:\r
1404 tst r2, r2\r
1405 beq .dwloop @ tileline blank\r
1406\r
1407 cmp r2, r2, ror #4\r
1408 beq .dw_SingleColor @ tileline singlecolor \r
1409\r
1410 tst r9, #0x0800\r
7a7c6476 1411 bne .dw_TileFlip\r
cc68a136 1412\r
1413 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
cc68a136 1414.dw_TileNorm:\r
1415 TileNorm r0\r
1416 b .dwloop\r
1417\r
7a7c6476 1418.dw_TileFlip:\r
1419 TileFlip r0\r
1420 b .dwloop\r
1421\r
cc68a136 1422.dw_SingleColor:\r
1423 and r4, r0, r2 @ #0x0000000f\r
1424 orr r4, r3, r4\r
1425 orr r4, r4, r4, lsl #8\r
1426 orr r4, r4, r4, lsl #16\r
1427 mov r5, r4\r
1428 stmia r1!, {r4,r5}\r
1429 b .dwloop_nor1 @ we incremeted r1 ourselves\r
1430\r
1431.dw_shadow:\r
1432 tst r6, #1 @ hi pri?\r
1433 orreq r3, r3, #0x40\r
1434 beq .dw_shadow_done\r
1435 ldr r4, [r1]\r
07abbab1 1436 mov r5, #0x3f\r
1437 orr r5, r5, r5, lsl #8\r
1438 orr r5, r5, r5, lsl #16\r
1439 and r4, r4, r5\r
cc68a136 1440 str r4, [r1]\r
1441 ldr r4, [r1,#4]\r
07abbab1 1442 and r4, r4, r5\r
cc68a136 1443 str r4, [r1,#4]\r
1444 b .dw_shadow_done\r
1445\r
1446.dwloop_end:\r
1447 ldr r0, =rendstatus\r
1448 ldr r1, [r0]\r
0fc0e241 1449 and r6, r6, #PDRAW_WND_DIFF_PRIO\r
cc68a136 1450 orr r1, r1, r6\r
1451 str r1, [r0]\r
1452\r
1453 ldmfd sp!, {r4-r11,r12}\r
1454 bx r12\r
1455\r
1456\r
1457@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1458\r
1459\r
1460@ hilights 2 pixels in RGB444/BGR444 format\r
1461.macro TileDoShHi2Pixels444 reg\r
1462 mov \reg, \reg, ror #12\r
1463 adds \reg, \reg, #0x40000000\r
1464 orrcs \reg, \reg, #0xf0000000\r
1465 mov \reg, \reg, ror #28\r
1466 adds \reg, \reg, #0x40000000\r
1467 orrcs \reg, \reg, #0xf0000000\r
1468 mov \reg, \reg, ror #28\r
1469 adds \reg, \reg, #0x40000000\r
1470 orrcs \reg, \reg, #0xf0000000\r
1471 mov \reg, \reg, ror #24\r
1472 adds \reg, \reg, #0x40000000\r
1473 orrcs \reg, \reg, #0xf0000000\r
1474 mov \reg, \reg, ror #28\r
1475 adds \reg, \reg, #0x40000000\r
1476 orrcs \reg, \reg, #0xf0000000\r
1477 mov \reg, \reg, ror #28\r
1478 adds \reg, \reg, #0x40000000\r
1479 orrcs \reg, \reg, #0xf0000000\r
1480 mov \reg, \reg, ror #12\r
1481.endm\r
1482\r
1483\r
1484.global FinalizeLineBGR444 @ int sh\r
1485\r
1486FinalizeLineBGR444:\r
1487 stmfd sp!, {r4-r6,lr}\r
1488 mov r6, r0\r
1489 ldr lr, =(Pico+0x22228) @ Pico.video\r
1490 ldr r0, =DrawLineDest\r
1491 ldrb r12, [lr, #12]\r
1492 ldr r0, [r0]\r
1493 sub r3, lr, #0x128 @ r3=Pico.cram\r
1494\r
1495 tst r12, #1\r
1496 movne r2, #320/4 @ len\r
1497 bne .fl_no32colBGR444\r
1498 ldr r4, =PicoOpt\r
1499 mov r2, #256/4\r
1500 ldr r4, [r4]\r
1501 tst r4, #0x100\r
1502 addeq r0, r0, #32*2\r
1503\r
1504.fl_no32colBGR444:\r
1505 tst r6, r6\r
1506 beq .fl_noshBGR444\r
1507\r
1508 ldr r4, =HighPal\r
1509\r
1510 ldrb r12, [lr, #-0x1a] @ 0x2220e ~ dirtyPal\r
1511 tst r12, r12\r
1512 moveq r3, r4\r
1513 beq .fl_noshBGR444\r
1514 mov r12, #0\r
1515 strb r12, [lr, #-0x1a]\r
1516\r
1517 mov lr, #0x40/8\r
1518 @ copy pal:\r
1519.fl_loopcpBGR444:\r
1520 ldmia r3!, {r1,r5,r6,r12}\r
1521 subs lr, lr, #1\r
1522 stmia r4!, {r1,r5,r6,r12}\r
1523 bne .fl_loopcpBGR444\r
1524\r
1525 @ shadowed pixels:\r
1526 mov r12, #0x0077\r
1527 orr r12,r12,#0x0700\r
1528 orr r12,r12,r12,lsl #16\r
1529 sub r3, r3, #0x40*2\r
1530 add r5, r4, #0x80*2\r
1531 mov lr, #0x40/4\r
1532.fl_loopcpBGR444_sh:\r
1533 ldmia r3!, {r1,r6}\r
1534 subs lr, lr, #1\r
1535 and r1, r12, r1, lsr #1\r
1536 and r6, r12, r6, lsr #1\r
1537 stmia r4!, {r1,r6}\r
1538 stmia r5!, {r1,r6}\r
1539 bne .fl_loopcpBGR444_sh\r
1540\r
1541 @ hilighted pixels:\r
1542 sub r3, r3, #0x40*2\r
1543 mov lr, #0x40/2\r
1544.fl_loopcpBGR444_hi:\r
1545 ldr r1, [r3], #4\r
1546 TileDoShHi2Pixels444 r1\r
1547 str r1, [r4], #4\r
1548 subs lr, lr, #1\r
1549 bne .fl_loopcpBGR444_hi\r
1550\r
1551 sub r3, r4, #0x40*3*2\r
e5fa9817 1552 mov r6, #1\r
cc68a136 1553\r
1554\r
1555.fl_noshBGR444:\r
e5fa9817 1556 ldr r12,=rendstatus\r
1557 eors r6, r6, #1 @ sh is 0\r
1558 ldr r12,[r12]\r
1559 mov lr, #0xff\r
283fec1b 1560 tstne r12,#PDRAW_ACC_SPRITES\r
e5fa9817 1561\r
ea8c405f 1562.if OVERRIDE_HIGHCOL\r
1563 ldr r1, =HighCol\r
e5fa9817 1564 movne lr, #0x3f\r
ea8c405f 1565 ldr r1, [r1]\r
1566 mov lr, lr, lsl #1\r
1567 add r1, r1, #8\r
1568.else\r
cc68a136 1569 ldr r1, =(HighCol+8)\r
e5fa9817 1570 movne lr, #0x3f\r
cc68a136 1571 mov lr, lr, lsl #1\r
ea8c405f 1572.endif\r
cc68a136 1573\r
1574.fl_loopBGR444:\r
cc68a136 1575 ldr r12, [r1], #4\r
1576 subs r2, r2, #1\r
1577\r
1578 and r4, lr, r12, lsl #1\r
1579 ldrh r4, [r3, r4]\r
1580 and r5, lr, r12, lsr #7\r
1581 ldrh r5, [r3, r5]\r
1582 and r6, lr, r12, lsr #15\r
1583 ldrh r6, [r3, r6]\r
e5fa9817 1584 and r12,lr, r12, lsr #23\r
1585 ldrh r12,[r3, r12] @ 1c.i.\r
cc68a136 1586 orr r4, r4, r5, lsl #16\r
e5fa9817 1587 orr r5, r6, r12,lsl #16\r
cc68a136 1588\r
1589 stmia r0!, {r4,r5}\r
1590 bne .fl_loopBGR444\r
1591\r
1592\r
1593 ldmfd sp!, {r4-r6,lr}\r
1594 bx lr\r
1595\r
1596\r
1597@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1598\r
1599\r
cc68a136 1600@ Convert 0000bbb0 ggg0rrr0\r
1601@ to rrrrrggg gggbbbbb\r
1602\r
a39d8ba5 1603@ r2,r3 - scratch, lr = 0x001c001c, r8 = 0x08610861\r
cc68a136 1604.macro convRGB565 reg\r
a39d8ba5 1605 and r2, lr, \reg,lsr #7 @ b\r
1606 and r3, lr, \reg,lsr #3 @ g\r
1607 and \reg, lr, \reg,lsl #1 @ r\r
1608 orr r2, r2, r3, lsl #6\r
1609 orr \reg, r2, \reg,lsl #11\r
1610\r
1611 and r2, r8, \reg,lsr #4\r
1612 orr \reg, \reg, r2\r
cc68a136 1613.endm\r
1614\r
a39d8ba5 1615@ trashes: r2-r8,r12,lr; r8 = 0x08610861; r0,r1 are advanced\r
b2305d08 1616.macro vidConvCpyRGB565_local\r
2ec14aec 1617 mov r12, r2, lsr #3 @ repeats\r
cc68a136 1618 mov lr, #0x001c0000\r
1619 orr lr, lr, #0x01c @ lr == pattern 0x001c001c\r
cc68a136 1620\r
b2305d08 16210:\r
cc68a136 1622 ldmia r1!, {r4-r7}\r
1623 subs r12, r12, #1\r
1624 convRGB565 r4\r
1625 str r4, [r0], #4\r
1626 convRGB565 r5\r
1627 str r5, [r0], #4\r
1628 convRGB565 r6\r
1629 str r6, [r0], #4\r
1630 convRGB565 r7\r
1631 str r7, [r0], #4\r
1632\r
b2305d08 1633 bgt 0b\r
1634.endm\r
1635\r
1636\r
1637.global vidConvCpyRGB565\r
cc68a136 1638\r
b2305d08 1639vidConvCpyRGB565: @ void *to, void *from, int pixels\r
1640 stmfd sp!, {r4-r9,lr}\r
a39d8ba5 1641 mov r8, #0x0061\r
1642 orr r8, r8, #0x0800\r
f4750ee0 1643 orr r8, r8, r8, lsl #16\r
b2305d08 1644 vidConvCpyRGB565_local\r
cc68a136 1645 ldmfd sp!, {r4-r9,lr}\r
1646 bx lr\r
1647\r
1648\r
b2305d08 1649.global PicoDoHighPal555 @ int sh\r
cc68a136 1650\r
b2305d08 1651PicoDoHighPal555:\r
1652 stmfd sp!, {r4-r9,lr}\r
1653 mov r1, #0\r
3d48f143 1654 ldr r8, =(Pico+0x22228) @ Pico.video\r
cc68a136 1655\r
b2305d08 1656PicoDoHighPal555_nopush:\r
a39d8ba5 1657 orr r9, r1, r0, lsl #31 @ 0:called from FinalizeLine555, 31: s/h\r
b2305d08 1658\r
b2305d08 1659 ldr r0, =HighPal\r
1660\r
cc68a136 1661 mov r1, #0\r
b2305d08 1662 strb r1, [r8, #-0x1a] @ 0x2220e ~ dirtyPal\r
1663\r
3d48f143 1664 sub r1, r8, #0x128 @ r1=Pico.cram\r
cc68a136 1665 mov r2, #0x40\r
a39d8ba5 1666 mov r8, #0x0061\r
1667 orr r8, r8, #0x0800\r
f4750ee0 1668 orr r8, r8, r8, lsl #16\r
a39d8ba5 1669\r
b2305d08 1670 vidConvCpyRGB565_local\r
cc68a136 1671\r
a39d8ba5 1672 tst r9, #(1<<31)\r
b2305d08 1673 beq PicoDoHighPal555_end\r
1674\r
1675 ldr r3, =HighPal\r
cc68a136 1676\r
1677 @ shadowed pixels:\r
1678 mov r12, #0x008e\r
cc68a136 1679 add r4, r3, #0x40*2\r
b2305d08 1680 orr r12,r12,#0x7300\r
cc68a136 1681 add r5, r3, #0xc0*2\r
b2305d08 1682 orr r12,r12,r12,lsl #16\r
cc68a136 1683 mov lr, #0x40/4\r
1684.fl_loopcpRGB555_sh:\r
1685 ldmia r3!, {r1,r6}\r
1686 subs lr, lr, #1\r
1687 and r1, r12, r1, lsr #1\r
1688 and r6, r12, r6, lsr #1\r
1689 stmia r4!, {r1,r6}\r
1690 stmia r5!, {r1,r6}\r
1691 bne .fl_loopcpRGB555_sh\r
1692\r
1693 @ hilighted pixels:\r
a39d8ba5 1694 @ t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e;\r
1695 @ t |= (t >> 4) & 0x08610861;\r
1696 @ r8=0x08610861\r
cc68a136 1697 sub r3, r3, #0x40*2\r
a39d8ba5 1698 mov lr, #0x40/4\r
cc68a136 1699.fl_loopcpRGB555_hi:\r
a39d8ba5 1700 ldmia r3!, {r1,r6}\r
1701 and r1, r12, r1, lsr #1\r
1702 and r6, r12, r6, lsr #1\r
1703 add r1, r12, r1\r
1704 add r6, r12, r6\r
1705 and r5, r8, r1, lsr #4\r
1706 and r7, r8, r6, lsr #4\r
1707 orr r1, r1, r5\r
1708 orr r6, r6, r7\r
1709 stmia r4!, {r1,r6}\r
cc68a136 1710 subs lr, lr, #1\r
1711 bne .fl_loopcpRGB555_hi\r
b2305d08 1712 mov r0, #1\r
cc68a136 1713\r
b2305d08 1714PicoDoHighPal555_end:\r
a39d8ba5 1715 tst r9, #1\r
b2305d08 1716 ldmeqfd sp!, {r4-r9,pc}\r
1717\r
1718 ldr r8, =(Pico+0x22228) @ Pico.video\r
1719 b FinalizeLineRGB555_pal_done\r
1720\r
1721\r
5a681086 1722.global FinalizeLine555 @ int sh\r
b2305d08 1723\r
5a681086 1724FinalizeLine555:\r
b2305d08 1725 stmfd sp!, {r4-r9,lr}\r
1726 ldr r8, =(Pico+0x22228) @ Pico.video\r
1727\r
1728 ldrb r2, [r8, #-0x1a] @ 0x2220e ~ dirtyPal\r
1729 mov r1, #1\r
1730 tst r2, r2\r
1731 bne PicoDoHighPal555_nopush\r
1732\r
1733FinalizeLineRGB555_pal_done:\r
1734 ldr r3, =HighPal\r
cc68a136 1735\r
e5fa9817 1736 ldr r12,=rendstatus\r
b2305d08 1737 eors r0, r0, #1 @ sh is 0\r
e5fa9817 1738 ldr r12,[r12]\r
1739 mov lr, #0xff\r
283fec1b 1740 tstne r12,#PDRAW_ACC_SPRITES\r
e5fa9817 1741 movne lr, #0x3f\r
1742\r
ea8c405f 1743.if OVERRIDE_HIGHCOL\r
1744 ldr r1, =HighCol\r
1745 ldr r0, =DrawLineDest\r
1746 ldr r1, [r1]\r
1747 ldr r0, [r0]\r
1748 add r1, r1, #8\r
1749.else\r
3d48f143 1750 ldr r0, =DrawLineDest\r
cc68a136 1751 ldr r1, =(HighCol+8)\r
3d48f143 1752 ldr r0, [r0]\r
ea8c405f 1753.endif\r
3d48f143 1754\r
1755 ldrb r12, [r8, #12]\r
cc68a136 1756 mov lr, lr, lsl #1\r
1757\r
3d48f143 1758 tst r12, #1\r
1759 movne r2, #320/8 @ len\r
1760 bne .fl_no32colRGB555\r
1761 ldr r4, =PicoOpt\r
1762 mov r2, #256/8\r
1763 ldr r4, [r4]\r
1764 tst r4, #0x4000\r
1765 bne .fl_32scale_RGB555\r
1766 tst r4, #0x0100\r
1767 addeq r0, r0, #32*2\r
1768\r
1769.fl_no32colRGB555:\r
cc68a136 1770\r
499a0be3 1771.if UNALIGNED_DRAWLINEDEST\r
1772 @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer\r
1773 tst r0, #2\r
1774 bne .fl_RGB555u\r
1775.endif\r
1776\r
1777.fl_loopRGB555:\r
cc68a136 1778 ldr r12, [r1], #4\r
1779 ldr r7, [r1], #4\r
1780\r
1781 and r4, lr, r12, lsl #1\r
1782 ldrh r4, [r3, r4]\r
1783 and r5, lr, r12, lsr #7\r
1784 ldrh r5, [r3, r5]\r
1785 and r6, lr, r12, lsr #15\r
1786 ldrh r6, [r3, r6]\r
1787 orr r4, r4, r5, lsl #16\r
1788\r
1789 and r5, lr, r12, lsr #23\r
1790 ldrh r5, [r3, r5]\r
1791 and r8, lr, r7, lsl #1\r
1792 ldrh r8, [r3, r8]\r
1793 orr r5, r6, r5, lsl #16\r
1794\r
1795 and r6, lr, r7, lsr #7\r
1796 ldrh r6, [r3, r6]\r
1797 and r12,lr, r7, lsr #15\r
1798 ldrh r12,[r3, r12]\r
499a0be3 1799 and r7, lr, r7, lsr #23\r
1800 ldrh r7, [r3, r7]\r
cc68a136 1801 orr r8, r8, r6, lsl #16\r
1802\r
cc68a136 1803 subs r2, r2, #1\r
499a0be3 1804 orr r12,r12, r7, lsl #16\r
cc68a136 1805\r
1806 stmia r0!, {r4,r5,r8,r12}\r
1807 bne .fl_loopRGB555\r
1808\r
b2305d08 1809 ldmfd sp!, {r4-r9,lr}\r
3d48f143 1810 bx lr\r
1811\r
1812\r
1813.fl_32scale_RGB555:\r
b2305d08 1814 stmfd sp!, {r10}\r
3d48f143 1815 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
1816 orr r9, r9, #0x00e7\r
1817\r
499a0be3 1818.if UNALIGNED_DRAWLINEDEST\r
1819 tst r0, #2\r
1820 bne .fl_32scale_RGB555u\r
1821.endif\r
1822\r
3d48f143 1823.fl_loop32scale_RGB555:\r
1824 ldr r12, [r1], #4\r
1825 ldr r7, [r1], #4\r
cc68a136 1826\r
3d48f143 1827 and r4, lr, r12,lsl #1\r
1828 ldrh r4, [r3, r4]\r
1829 and r5, lr, r12,lsr #7\r
1830 ldrh r5, [r3, r5]\r
1831 and r4, r4, r9, lsl #2\r
1832 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0\r
1833 and r5, r5, r9, lsl #2\r
1834 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1\r
1835 add r4, r4, r6, lsl #16 @ pix_d 0, 1\r
1836 and r6, lr, r12,lsr #15\r
1837 ldrh r6, [r3, r6]\r
1838 and r12,lr, r12,lsr #23\r
1839 ldrh r12,[r3, r12]\r
1840 and r6, r6, r9, lsl #2\r
1841 add r5, r5, r6\r
1842 mov r5, r5, lsr #1\r
1843 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2\r
1844 orr r5, r5, r6, lsl #16\r
1845\r
1846 and r6, lr, r7, lsl #1\r
1847 ldrh r6, [r3, r6]\r
1848 and r12,r12,r9, lsl #2\r
1849 add r5, r5, r12,lsl #14 @ pix_d 2, 3\r
1850 and r6, r6, r9, lsl #2\r
1851 orr r6, r12,r6, lsl #16 @ pix_d 4, 5\r
1852\r
1853 and r12,lr, r7, lsr #7\r
1854 ldrh r12,[r3, r12]\r
1855 and r10,lr, r7, lsr #15\r
1856 ldrh r10,[r3, r10]\r
1857 and r12,r12,r9, lsl #2\r
1858 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1\r
1859 add r8, r8, r6, lsr #18\r
1860 and r7, lr, r7, lsr #23\r
1861 ldrh r7, [r3, r7]\r
1862 and r10,r10,r9, lsl #2\r
1863 orr r8, r8, r10,lsl #15\r
1864 add r8, r8, r12,lsl #15 @ pix_d 6, 7\r
1865 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2\r
1866 and r7, r7, r9, lsl #2\r
1867 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3\r
1868 orr r10,r10,r7, lsl #16 @ pix_d 8, 9\r
1869\r
1870 subs r2, r2, #1\r
1871\r
1872 stmia r0!, {r4,r5,r6,r8,r10}\r
1873 bne .fl_loop32scale_RGB555\r
1874\r
b2305d08 1875 ldmfd sp!, {r10}\r
1876 ldmfd sp!, {r4-r9,lr}\r
3d48f143 1877 bx lr\r
1878\r
499a0be3 1879.if UNALIGNED_DRAWLINEDEST\r
1880 @ unaligned versions of loops\r
9839d126 1881 @ warning: starts drawing 2bytes before dst\r
499a0be3 1882\r
1883.fl_RGB555u:\r
9839d126 1884 sub r0, r0, #2 @ initial adjustment\r
1885 mov r8, #0\r
499a0be3 1886\r
1887.fl_loopRGB555u:\r
1888 ldr r12, [r1], #4\r
1889 ldr r7, [r1], #4\r
1890\r
9839d126 1891 and r6, lr, r12,lsl #1\r
1892 ldrh r6, [r3, r6]\r
1893 and r5, lr, r12,lsr #7\r
499a0be3 1894 ldrh r5, [r3, r5]\r
9839d126 1895 orr r4, r8, r6, lsl #16\r
499a0be3 1896\r
9839d126 1897 and r6, lr, r12,lsr #15\r
499a0be3 1898 ldrh r6, [r3, r6]\r
9839d126 1899 and r8, lr, r12,lsr #23\r
499a0be3 1900 ldrh r8, [r3, r8]\r
9839d126 1901 orr r5, r5, r6, lsl #16\r
499a0be3 1902\r
9839d126 1903 and r6, lr, r7, lsl #1\r
499a0be3 1904 ldrh r6, [r3, r6]\r
9839d126 1905 and r12,lr, r7, lsr #7\r
499a0be3 1906 ldrh r12,[r3, r12]\r
9839d126 1907 orr r6, r8, r6, lsl #16\r
1908\r
1909 and r8, lr, r7, lsr #15\r
499a0be3 1910 ldrh r8, [r3, r8]\r
9839d126 1911 and r7, lr, r7, lsr #23\r
499a0be3 1912\r
1913 subs r2, r2, #1\r
9839d126 1914 orr r12,r12,r8, lsl #16\r
1915 ldrh r8, [r3, r7]\r
1916\r
1917 stmia r0!, {r4,r5,r6,r12}\r
499a0be3 1918 bne .fl_loopRGB555u\r
1919\r
1920 strh r8, [r0], #2\r
1921\r
b2305d08 1922 ldmfd sp!, {r4-r9,lr}\r
499a0be3 1923 bx lr\r
1924\r
1925\r
1926.fl_32scale_RGB555u:\r
9839d126 1927 sub r0, r0, #2 @ initial adjustment\r
1928 mov r4, #0\r
499a0be3 1929\r
1930 @ r9 f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
1931.fl_loop32scale_RGB555u:\r
1932 ldr r12, [r1], #4\r
1933 ldr r7, [r1], #4\r
1934\r
9839d126 1935 and r6, lr, r12,lsl #1\r
1936 ldrh r6, [r3, r6]\r
499a0be3 1937 and r5, lr, r12,lsr #7\r
1938 ldrh r5, [r3, r5]\r
9839d126 1939 and r6, r6, r9, lsl #2\r
1940 orr r4, r4, r6, lsl #16 @ r4 = pix_d -1, 0\r
499a0be3 1941\r
1942 and r5, r5, r9, lsl #2\r
9839d126 1943 sub r8, r5, r5, lsr #2 @ r8 = 3/4 pix_s 1\r
1944 add r6, r8, r6, lsr #2 @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1)\r
1945 orr r5, r6, r5, lsl #15\r
499a0be3 1946\r
1947 and r6, lr, r12,lsr #15\r
1948 ldrh r6, [r3, r6]\r
1949 and r12,lr, r12,lsr #23\r
1950 ldrh r12,[r3, r12]\r
1951 and r6, r6, r9, lsl #2\r
9839d126 1952 add r5, r5, r6, lsl #15 @ r5 = pix_d 1, 2\r
499a0be3 1953\r
9839d126 1954 and r8, lr, r7, lsl #1\r
1955 ldrh r8, [r3, r8]\r
1956 and r10,lr, r7, lsr #7\r
1957 ldrh r10,[r3, r10]\r
499a0be3 1958 and r12,r12,r9, lsl #2\r
9839d126 1959 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2\r
1960 add r6, r6, r12,lsr #2\r
1961 orr r6, r6, r12,lsl #16 @ r6 = pix_d 3, 4\r
499a0be3 1962\r
9839d126 1963 and r8, r8, r9, lsl #2\r
1964 and r10,r10,r9, lsl #2\r
1965 sub r12,r10,r10,lsr #2 @ r12 = 3/4 pix_s 5\r
1966 orr r8, r8, r8, lsl #14\r
1967 add r8, r8, r12,lsl #16 @ r8 = pix_d 5, 6\r
1968 and r12,lr, r7, lsr #15\r
499a0be3 1969 ldrh r12,[r3, r12]\r
499a0be3 1970 and r7, lr, r7, lsr #23\r
1971 ldrh r7, [r3, r7]\r
9839d126 1972 and r12,r12,r9, lsl #2\r
1973 add r10,r10,r12\r
1974 mov r10,r10, lsr #1\r
1975 sub r12,r12,r12,lsr #2 @ r12 = 3/4 pix_s 6\r
1976 orr r10,r10,r12,lsl #16\r
499a0be3 1977 and r7, r7, r9, lsl #2\r
9839d126 1978 add r10,r10,r7, lsl #14 @ r10 = pix_d 7, 8\r
499a0be3 1979\r
1980 subs r2, r2, #1\r
1981\r
1982 stmia r0!, {r4,r5,r6,r8,r10}\r
9839d126 1983 mov r4, r7\r
499a0be3 1984 bne .fl_loop32scale_RGB555u\r
1985\r
9839d126 1986 strh r4, [r0], #2\r
1987\r
b2305d08 1988 ldmfd sp!, {r10}\r
1989 ldmfd sp!, {r4-r9,lr}\r
499a0be3 1990 bx lr\r
1991\r
1992.endif @ UNALIGNED_DRAWLINEDEST\r
1993\r
cc68a136 1994\r
1995@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1996\r
1997@ utility\r
1998.global blockcpy @ void *dst, void *src, size_t n\r
1999\r
2000blockcpy:\r
2001 stmfd sp!, {r4,r5}\r
2002 mov r2, r2, lsr #4\r
2003blockcpy_loop:\r
2004 ldmia r1!, {r3-r5,r12}\r
2005 subs r2, r2, #1\r
2006 stmia r0!, {r3-r5,r12}\r
2007 bne blockcpy_loop\r
2008 ldmfd sp!, {r4,r5}\r
2009 bx lr\r
2010\r
2011\r
2012.global blockcpy_or @ void *dst, void *src, size_t n, int pat\r
2013\r
2014blockcpy_or:\r
2015 stmfd sp!, {r4-r6}\r
2016 orr r3, r3, r3, lsl #8\r
2017 orr r3, r3, r3, lsl #16\r
2018 mov r2, r2, lsr #4\r
2019blockcpy_loop_or:\r
2020 ldmia r1!, {r4-r6,r12}\r
2021 subs r2, r2, #1\r
2022 orr r4, r4, r3\r
2023 orr r5, r5, r3\r
2024 orr r6, r6, r3\r
2025 orr r12,r12,r3\r
2026 stmia r0!, {r4-r6,r12}\r
2027 bne blockcpy_loop_or\r
2028 ldmfd sp!, {r4-r6}\r
2029 bx lr\r
2030\r
cff531af 2031@ vim:filetype=armasm\r