2 * assembly optimized versions of most funtions from draw2.c
\r
3 * (C) notaz, 2006-2008
\r
5 * This work is licensed under the terms of MAME license.
\r
6 * See COPYING file in the top-level directory.
\r
8 * this is highly specialized, be careful if changing related C code!
\r
14 @ define these constants in your include file:
\r
15 @ .equiv START_ROW, 1
\r
16 @ .equiv END_ROW, 27
\r
17 @ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered.
\r
28 .global BackFillFull @ int reg7
\r
31 stmfd sp!, {r4-r9,lr}
\r
33 ldr lr, =PicoDraw2FB @ lr=PicoDraw2FB
\r
39 orr r0, r0, r0, lsl #8
\r
40 orr r0, r0, r0, lsl #16
\r
42 mov r1, r0 @ 25 opcodes wasted?
\r
52 mov r12, #(END_ROW-START_ROW)*8
\r
59 stmia lr!, {r0-r9} @ 10*4*8
\r
70 ldmfd sp!, {r4-r9,r12}
\r
75 @ -------- some macros --------
\r
79 @ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old
\r
80 .macro TileLineSinglecol notsinglecol=0
\r
81 and r2, r2, #0xf @ #0x0000000f
\r
83 cmp r2, r0, lsr #28 @ if these don't match,
\r
84 bicne r9, r9, #2 @ it is a sign that whole tile is not singlecolor (only it's lines may be)
\r
87 orr r4, r4, r4, lsl #8
\r
89 tst r1, #1 @ not aligned?
\r
95 strneb r4, [r1], #1 @ have a remaining unaligned pixel?
\r
99 orr r0, r0, r2, lsl #28 @ we will need the old palindex later
\r
103 @ TileNorm (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
\r
104 .macro TileLineNorm
\r
105 ands r4, r0, r2, lsr #12 @ #0x0000f000
\r
108 ands r4, r0, r2, lsr #8 @ #0x00000f00
\r
111 ands r4, r0, r2, lsr #4 @ #0x000000f0
\r
114 ands r4, r0, r2 @ #0x0000000f
\r
117 ands r4, r0, r2, lsr #28 @ #0xf0000000
\r
120 ands r4, r0, r2, lsr #24 @ #0x0f000000
\r
123 ands r4, r0, r2, lsr #20 @ #0x00f00000
\r
126 ands r4, r0, r2, lsr #16 @ #0x000f0000
\r
131 @ TileFlip (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
\r
132 .macro TileLineFlip
\r
133 ands r4, r0, r2, lsr #16 @ #0x000f0000
\r
136 ands r4, r0, r2, lsr #20 @ #0x00f00000
\r
139 ands r4, r0, r2, lsr #24 @ #0x0f000000
\r
142 ands r4, r0, r2, lsr #28 @ #0xf0000000
\r
145 ands r4, r0, r2 @ #0x0000000f
\r
148 ands r4, r0, r2, lsr #4 @ #0x000000f0
\r
151 ands r4, r0, r2, lsr #8 @ #0x00000f00
\r
154 ands r4, r0, r2, lsr #12 @ #0x0000f000
\r
159 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
160 .macro Tile hflip vflip
\r
161 mov r7, r9, lsl #13 @ r9=code<<8; addr=(code&0x7ff)<<4;
\r
162 add r7, r10, r7, lsr #16
\r
163 orr r9, r9, #3 @ emptytile=singlecolor=1, r9 must be <code_16> 00000xxx
\r
165 @ we read tilecodes in reverse order if we have vflip
\r
168 @ loop through 8 lines
\r
169 orr r9, r9, #(7<<24)
\r
172 0: @ singlecol_loop
\r
173 subs r9, r9, #(1<<24)
\r
174 add r1, r1, #328 @ set pointer to next line
\r
175 bmi 8f @ loop_exit with r0 restore
\r
178 ldr r2, [r7, #-4]! @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
\r
183 beq 2f @ empty line
\r
186 bne 3f @ not singlecolor
\r
193 subs r9, r9, #(1<<24)
\r
194 add r1, r1, #328 @ set pointer to next line
\r
195 bmi 8f @ loop_exit with r0 restore
\r
197 ldr r2, [r7, #-4]! @ next pack
\r
201 mov r0, #0xf @ singlecol_loop might have messed r0
\r
205 bic r9, r9, #3 @ if we are here, it means we have empty and not empty line
\r
208 3: @ not empty, not singlecol
\r
213 4: @ not empty, not singlecol loop
\r
214 subs r9, r9, #(1<<24)
\r
215 add r1, r1, #328 @ set pointer to next line
\r
218 ldr r2, [r7, #-4]! @ next pack
\r
223 beq 4b @ empty line
\r
226 beq 7f @ singlecolor line
\r
235 TileLineSinglecol 1
\r
241 add r9, r9, #(1<<24) @ fix r9
\r
242 sub r1, r1, #328*8 @ restore pdest pointer
\r
246 @ TileLineSinglecolAl (r1=pdest, r4,r7=color)
\r
247 .macro TileLineSinglecolAl0
\r
252 .macro TileLineSinglecolAl1
\r
256 strb r4, [r1], #1+320
\r
260 .macro TileLineSinglecolAl2
\r
267 .macro TileLineSinglecolAl3
\r
271 strb r4, [r1], #1+320
\r
275 @ TileSinglecol (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=0xf
\r
276 @ kaligned==1, if dest is always aligned
\r
277 .macro TileSinglecol kaligned=0
\r
278 and r4, r2, #0xf @ we assume we have good r2 from previous time
\r
280 orr r4, r4, r4, lsl #8
\r
281 orr r4, r4, r4, lsl #16
\r
285 tst r1, #2 @ not aligned?
\r
291 TileLineSinglecolAl0
\r
292 TileLineSinglecolAl0
\r
293 TileLineSinglecolAl0
\r
294 TileLineSinglecolAl0
\r
295 TileLineSinglecolAl0
\r
296 TileLineSinglecolAl0
\r
297 TileLineSinglecolAl0
\r
298 TileLineSinglecolAl0
\r
303 TileLineSinglecolAl1
\r
304 TileLineSinglecolAl1
\r
305 TileLineSinglecolAl1
\r
306 TileLineSinglecolAl1
\r
307 TileLineSinglecolAl1
\r
308 TileLineSinglecolAl1
\r
309 TileLineSinglecolAl1
\r
310 TileLineSinglecolAl1
\r
317 TileLineSinglecolAl2
\r
318 TileLineSinglecolAl2
\r
319 TileLineSinglecolAl2
\r
320 TileLineSinglecolAl2
\r
321 TileLineSinglecolAl2
\r
322 TileLineSinglecolAl2
\r
323 TileLineSinglecolAl2
\r
324 TileLineSinglecolAl2
\r
328 TileLineSinglecolAl3
\r
329 TileLineSinglecolAl3
\r
330 TileLineSinglecolAl3
\r
331 TileLineSinglecolAl3
\r
332 TileLineSinglecolAl3
\r
333 TileLineSinglecolAl3
\r
334 TileLineSinglecolAl3
\r
335 TileLineSinglecolAl3
\r
339 sub r1, r1, #328*8 @ restore pdest pointer
\r
344 @ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll]
\r
346 @static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend)
\r
348 .global DrawLayerFull
\r
351 stmfd sp!, {r4-r11,lr}
\r
353 mov r6, r1 @ hcache
\r
355 ldr r11, =(Pico+0x22228) @ Pico.video
\r
356 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
357 ldrb r5, [r11, #13] @ pvid->reg[13]
\r
358 ldrb r7, [r11, #11]
\r
361 and lr, lr, #0x00ff0000 @ lr=cells
\r
363 mov r5, r5, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)
\r
364 add r5, r5, r0, lsl #1 @ htab+=plane
\r
365 bic r5, r5, #0x00ff0000 @ just in case
\r
367 tst r7, #3 @ full screen scroll? (if ==0)
\r
368 ldrb r7, [r11, #16] @ ??hh??ww
\r
369 ldreqh r5, [r10, r5]
\r
370 biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff)
\r
371 movne r5, r5, lsr #1
\r
372 orrne r5, r5, #0x8000 @ this marks that we have htab pointer, not hscroll here
\r
376 orr r5, r5, r7, lsl #1+24
\r
377 orr r5, r5, #0x1f000000
\r
379 biclt r5, r5, #0x80000000
\r
380 biceq r5, r5, #0xc0000000
\r
381 bicgt r5, r5, #0xe0000000
\r
383 mov r9, r2, lsl #24
\r
384 orr r5, r5, r9, lsr #8 @ r5=(ymask<<24)|(trow<<16)|[htab||hscroll]
\r
388 subge r4, r4, #1 @ r4=shift[width] (5,6,6,7)
\r
391 orr lr, lr, r3, lsl #24 @ lr=(planeend<<24)|(cells<<16)|shift[width]
\r
394 mov r8, r8, lsl #24+5
\r
395 orr r8, r8, #0x1f000000
\r
399 ldreqb r4, [r11, #2]
\r
400 moveq r4, r4, lsr #3
\r
401 ldrneb r4, [r11, #4]
\r
403 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13
\r
405 ldr r11, =PicoDraw2FB @ r11=PicoDraw2FB
\r
406 sub r4, r9, #(START_ROW<<24)
\r
408 mov r4, r4, asr #24
\r
410 mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW);
\r
412 @ Get vertical scroll value:
\r
413 add r7, r10, #0x012000
\r
414 add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)
\r
417 moveq r7, r7, lsl #22
\r
418 movne r7, r7, lsl #6
\r
419 mov r7, r7, lsr #22 @ r7=vscroll (10 bits)
\r
421 orr lr, lr, r7, lsl #3
\r
422 mov lr, lr, ror #24 @ packed: cccccccc nnnvvvvv vvvvvsss pppppppp: cells, nametab, vscroll, shift[width], planeend
\r
425 addne lr, lr, #1 @ we have vertically clipped tiles due to vscroll, so we need 1 more row
\r
428 str r7, [r6], #4 @ push y-offset to tilecache
\r
430 mla r11, r4, r7, r11 @ scrpos+=(8-(vscroll&7))*328;
\r
432 mov r9, #0xff000000 @ r9=(prevcode<<8)|flags: 1~tile empty, 2~tile singlecolor
\r
435 mov r4, lr, lsl #11
\r
436 mov r4, r4, lsr #25 @ r4=vscroll>>3 (7 bits)
\r
437 add r4, r4, r5, lsr #16 @ +trow
\r
438 and r4, r4, r5, lsr #24 @ &=ymask
\r
440 and r7, r7, #7 @ shift[width]
\r
442 and r0, r0, #0x7000 @ nametab
\r
443 add r12,r0, r4, lsl r7 @ nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<<shift[width]);
\r
445 mov r4, lr, lsr #24
\r
446 orr r12,r12,r4, lsl #23
\r
447 mov r12,r12,lsl #1 @ (nametab_row|(cells<<24)) (halfword compliant)
\r
451 moveq r7, r5, lsl #22 @ hscroll (0-3FFh)
\r
452 moveq r7, r7, lsr #22
\r
453 beq .rtr_hscroll_done
\r
455 @ get hscroll from htab
\r
456 mov r7, r5, lsl #17
\r
457 ands r4, r5, #0x00ff0000
\r
458 add r7, r7, r4, lsl #5 @ +=trow<<4
\r
459 andne r4, lr, #0x3800
\r
460 subne r7, r7, r4, lsl #7 @ if(trow) htaddr-=(vscroll&7)<<1;
\r
461 mov r7, r7, lsr #16 @ halfwords
\r
465 and r8, r8, #0xff000000
\r
466 rsb r4, r7, #0 @ r4=tilex=(-ts->hscroll)>>3
\r
469 orr r8, r8, r4 @ r8=(xmask<<24)|tilex
\r
473 add r7, r7, #1 @ r7=dx=((ts->hscroll-1)&7)+1
\r
476 subeq r12,r12, #0x01000000 @ we will loop cells+1 times, so loop less when there is no hscroll
\r
478 add r1, r11, r7 @ r1=pdest
\r
482 @ r4 & r7 are scratch in this loop
\r
483 .rtrloop: @ 40-41 times
\r
485 subs r12,r12, #0x01000000
\r
490 and r7, r8, r8, lsr #24
\r
491 add r7, r10, r7, lsl #1
\r
492 bic r4, r12, #0xff000000 @ Pico.vram[nametab_row+(tilex&xmask)];
\r
493 ldrh r7, [r7, r4] @ r7=code (int, but from unsigned, no sign extend)
\r
499 bne .rtr_notsamecode
\r
500 @ we know stuff about this tile already
\r
502 bne .rtrloop @ empty tile
\r
504 bne .rtr_singlecolor @ singlecolor tile
\r
508 and r4, r9, #0x600000
\r
509 mov r9, r7, lsl #8 @ remember new code
\r
512 and r7, r7, #0x6000
\r
513 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
516 tst r9, #0x100000 @ vflip?
\r
519 tst r9, #0x080000 @ hflip?
\r
522 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
531 tst r9, #0x080000 @ hflip?
\r
532 bne .rtr_vflip_hflip
\r
546 @ *(*hcache)++ = code|(dx<<16)|(trow<<27);
\r
548 orr r7, r7, r4, lsl #16
\r
549 and r4, r5, #0x00ff0000
\r
550 orr r7, r7, r4, lsl #11 @ (trow<<27)
\r
551 str r7, [r6], #4 @ cache hi priority tile
\r
555 add r5, r5, #0x00010000
\r
557 cmp r4, lr, lsl #24
\r
558 bge .rtrloop_outer_exit
\r
559 add r11, r11, #328*8
\r
562 .rtrloop_outer_exit:
\r
564 @ terminate cache list
\r
566 str r0, [r6] @ save cache pointer
\r
568 ldmfd sp!, {r4-r11,lr}
\r
575 .global DrawTilesFromCacheF @ int *hc
\r
577 DrawTilesFromCacheF:
\r
578 stmfd sp!, {r4-r10,lr}
\r
580 mov r9, #0xff000000 @ r9=prevcode=-1
\r
581 mvn r6, #0 @ r6=prevy=-1
\r
583 ldr r4, =PicoDraw2FB @ r4=PicoDraw2FB
\r
584 ldr r1, [r0], #4 @ read y offset
\r
588 sub r12, r1, #(328*8*START_ROW) @ r12=scrpos
\r
590 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
595 @ *hcache++ = code|(dx<<16)|(trow<<27); // cache it
\r
598 ldr r7, [r8], #4 @ read code
\r
599 movs r1, r7, lsr #16 @ r1=dx;
\r
600 ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return
\r
603 cmp r6, r7, lsr #27
\r
604 movne r6, r7, lsr #27
\r
606 mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8
\r
608 bic r1, r1, #0xf800
\r
609 add r1, r5, r1 @ r1=pdest (halfwords)
\r
611 mov r7, r7, lsl #16
\r
612 mov r7, r7, lsr #16
\r
615 bne .dtfcf_notsamecode
\r
616 @ we know stuff about this tile already
\r
618 bne .dtfcf_loop @ empty tile
\r
620 bne .dtfcf_singlecolor @ singlecolor tile
\r
623 .dtfcf_notsamecode:
\r
624 and r4, r9, #0x600000
\r
625 mov r9, r7, lsl #8 @ remember new code
\r
628 and r7, r7, #0x6000
\r
629 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
634 tst r9, #0x100000 @ vflip?
\r
637 tst r9, #0x080000 @ hflip?
\r
640 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
649 tst r9, #0x080000 @ hflip?
\r
650 bne .dtfcf_vflip_hflip
\r
655 .dtfcf_vflip_hflip:
\r
659 .dtfcf_singlecolor:
\r
668 @ (tile_start<<16)|row_start
\r
669 .global DrawWindowFull @ int tstart, int tend, int prio
\r
672 stmfd sp!, {r4-r11,lr}
\r
674 ldr r11, =(Pico+0x22228) @ Pico.video
\r
675 ldrb r12, [r11, #3] @ pvid->reg[3]
\r
676 mov r12, r12, lsl #10
\r
679 mov r5, #1 @ nametab_step
\r
680 tst r4, #1 @ 40 cell mode?
\r
681 andne r12, r12, #0xf000 @ 0x3c<<10
\r
682 andeq r12, r12, #0xf800
\r
683 movne r5, r5, lsl #7
\r
684 moveq r5, r5, lsl #6 @ nametab_step
\r
687 mla r12, r5, r4, r12 @ nametab += nametab_step*start;
\r
689 mov r4, r0, lsr #16 @ r4=start_cell_h
\r
690 add r7, r12, r4, lsl #1
\r
692 @ fetch the first code now
\r
693 ldr r10, =(Pico+0x10000) @ lr=Pico.vram
\r
695 cmp r2, r7, lsr #15
\r
696 ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority
\r
698 rsb r8, r4, r1, lsr #16 @ cells (h)
\r
699 orr r8, r8, r4, lsl #8
\r
700 mov r4, r1, lsl #24
\r
701 sub r4, r4, r0, lsl #24
\r
702 orr r8, r8, r4, lsr #8 @ r8=cells_h|(start_cell_h<<8)|(cells_v<<16)
\r
703 sub r8, r8, #0x010000 @ adjust for algo
\r
705 mov r9, #0xff000000 @ r9=prevcode=-1
\r
707 ldr r11, =PicoDraw2FB @ r11=scrpos
\r
710 sub r4, r4, #START_ROW
\r
711 add r11, r11, #328*8
\r
715 mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW);
\r
719 and r6, r8, #0xff00 @ r6=tilex
\r
720 add r1, r11, r6, lsr #5 @ r1=pdest
\r
721 add r6, r12, r6, lsr #7
\r
722 add r6, r10, r6 @ r6=Pico.vram+nametab+tilex
\r
723 orr r8, r8, r8, lsl #24
\r
724 sub r8, r8, #0x01000000 @ cell loop counter
\r
727 @ r4 & r7 are scratch in this loop
\r
730 subs r8, r8, #0x01000000
\r
734 ldrh r7, [r6], #2 @ r7=code
\r
737 bne .dwf_notsamecode
\r
738 @ we know stuff about this tile already
\r
740 bne .dwfloop @ empty tile
\r
742 bne .dwf_singlecolor @ singlecolor tile
\r
746 and r4, r9, #0x600000
\r
747 mov r9, r7, lsl #8 @ remember new code
\r
750 and r7, r7, #0x6000
\r
751 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
755 tst r9, #0x100000 @ vflip?
\r
758 tst r9, #0x080000 @ hflip?
\r
761 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
770 tst r9, #0x080000 @ hflip?
\r
771 bne .dwf_vflip_hflip
\r
785 bic r8, r8, #0xff000000 @ fix r8
\r
786 subs r8, r8, #0x010000
\r
787 ldmmifd sp!, {r4-r11,pc}
\r
788 add r11, r11, #328*8
\r
789 add r12, r12, r5 @ nametab+=nametab_step
\r
795 @ ---------------- sprites ---------------
\r
797 .macro SpriteLoop hflip vflip
\r
799 mov r1, r5, lsr #24 @ height
\r
801 mla r11, r1, r0, r11 @ scrpos+=height*328*8;
\r
802 add r12, r12, r1, lsl #3 @ sy+=height*8
\r
807 add r8, r8, r1, lsl #3 @ sx+=width*8
\r
811 add r9, r9, r5, lsr #16
\r
812 sub r5, r5, #1 @ sub width
\r
816 cmp r8, #0 @ skip tiles hidden on the left of screen
\r
819 add r9, r9, r5, lsr #16
\r
828 add r8, r8, #8 @ sx+=8
\r
830 bic r5, r5, #0xff000000 @ fix height
\r
831 orr r5, r5, r5, lsl #16
\r
834 sub r5, r5, #1 @ width--
\r
835 movs r1, r5, lsl #24
\r
836 ldmmifd sp!, {r4-r11,pc} @ end of tile
\r
838 subs r8, r8, #8 @ sx-=8
\r
839 ldmlefd sp!, {r4-r11,pc} @ tile offscreen
\r
842 ldmgefd sp!, {r4-r11,pc} @ tile offscreen
\r
844 mov r6, r12 @ r6=sy
\r
845 add r1, r11, r8 @ pdest=scrpos+sx
\r
849 add r9, r9, #1<<8 @ tile++
\r
851 add r6, r6, #8 @ sy+=8
\r
857 subs r5, r5, #0x01000000
\r
860 sub r6, r6, #8 @ sy-=8
\r
865 cmp r6, #(START_ROW*8)
\r
868 cmp r6, #(END_ROW*8+8)
\r
871 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
872 Tile \hflip, \vflip
\r
877 .global DrawSpriteFull @ unsigned int *sprite
\r
880 stmfd sp!, {r4-r11,lr}
\r
882 ldr r3, [r0] @ sprite[0]
\r
884 mov r6, r5, lsr #30
\r
885 add r6, r6, #1 @ r6=width
\r
887 mov r5, r5, lsr #30
\r
888 add r5, r5, #1 @ r5=height
\r
890 mov r12, r3, lsl #23
\r
891 mov r12, r12, lsr #23
\r
893 ldr lr, [r0, #4] @ lr=code
\r
894 sub r12, r12, #0x78 @ r12=sy
\r
896 mov r8, r8, lsr #23
\r
897 sub r8, r8, #0x78 @ r8=sx
\r
899 mov r9, lr, lsl #21
\r
900 mov r9, r9, lsr #13 @ r9=tile<<8
\r
902 and r3, lr, #0x6000
\r
903 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30);
\r
905 ldr r11, =PicoDraw2FB @ r11=scrpos
\r
906 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
908 sub r1, r12, #(START_ROW*8)
\r
910 mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328;
\r
912 orr r5, r5, r5, lsl #16 @
\r
913 orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)
\r
915 tst lr, #0x1000 @ vflip?
\r
918 tst lr, #0x0800 @ hflip?
\r
927 tst lr, #0x0800 @ hflip?
\r
928 bne .dsf_vflip_hflip
\r
937 @ vim:filetype=armasm
\r