2 * assembly optimized versions of most funtions from draw2.c
\r
3 * (C) notaz, 2006-2008
\r
5 * This work is licensed under the terms of MAME license.
\r
6 * See COPYING file in the top-level directory.
\r
8 * this is highly specialized, be careful if changing related C code!
\r
14 @ define these constants in your include file:
\r
15 @ .equiv START_ROW, 1
\r
16 @ .equiv END_ROW, 27
\r
17 @ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered.
\r
18 .include "port_config.s"
\r
21 .global BackFillFull @ int reg7
\r
24 stmfd sp!, {r4-r9,lr}
\r
26 ldr lr, =PicoDraw2FB @ lr=PicoDraw2FB
\r
32 orr r0, r0, r0, lsl #8
\r
33 orr r0, r0, r0, lsl #16
\r
35 mov r1, r0 @ 25 opcodes wasted?
\r
45 mov r12, #(END_ROW-START_ROW)*8
\r
52 stmia lr!, {r0-r9} @ 10*4*8
\r
63 ldmfd sp!, {r4-r9,r12}
\r
68 @ -------- some macros --------
\r
72 @ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old
\r
73 .macro TileLineSinglecol notsinglecol=0
\r
74 and r2, r2, #0xf @ #0x0000000f
\r
76 cmp r2, r0, lsr #28 @ if these don't match,
\r
77 bicne r9, r9, #2 @ it is a sign that whole tile is not singlecolor (only it's lines may be)
\r
80 orr r4, r4, r4, lsl #8
\r
82 tst r1, #1 @ not aligned?
\r
88 strneb r4, [r1], #1 @ have a remaining unaligned pixel?
\r
92 orr r0, r0, r2, lsl #28 @ we will need the old palindex later
\r
96 @ TileNorm (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
\r
98 ands r4, r0, r2, lsr #12 @ #0x0000f000
\r
101 ands r4, r0, r2, lsr #8 @ #0x00000f00
\r
104 ands r4, r0, r2, lsr #4 @ #0x000000f0
\r
107 ands r4, r0, r2 @ #0x0000000f
\r
110 ands r4, r0, r2, lsr #28 @ #0xf0000000
\r
113 ands r4, r0, r2, lsr #24 @ #0x0f000000
\r
116 ands r4, r0, r2, lsr #20 @ #0x00f00000
\r
119 ands r4, r0, r2, lsr #16 @ #0x000f0000
\r
124 @ TileFlip (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
\r
125 .macro TileLineFlip
\r
126 ands r4, r0, r2, lsr #16 @ #0x000f0000
\r
129 ands r4, r0, r2, lsr #20 @ #0x00f00000
\r
132 ands r4, r0, r2, lsr #24 @ #0x0f000000
\r
135 ands r4, r0, r2, lsr #28 @ #0xf0000000
\r
138 ands r4, r0, r2 @ #0x0000000f
\r
141 ands r4, r0, r2, lsr #4 @ #0x000000f0
\r
144 ands r4, r0, r2, lsr #8 @ #0x00000f00
\r
147 ands r4, r0, r2, lsr #12 @ #0x0000f000
\r
152 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
153 .macro Tile hflip vflip
\r
154 mov r7, r9, lsl #13 @ r9=code<<8; addr=(code&0x7ff)<<4;
\r
155 add r7, r10, r7, lsr #16
\r
156 orr r9, r9, #3 @ emptytile=singlecolor=1, r9 must be <code_16> 00000xxx
\r
158 @ we read tilecodes in reverse order if we have vflip
\r
161 @ loop through 8 lines
\r
162 orr r9, r9, #(7<<24)
\r
165 0: @ singlecol_loop
\r
166 subs r9, r9, #(1<<24)
\r
167 add r1, r1, #328 @ set pointer to next line
\r
168 bmi 8f @ loop_exit with r0 restore
\r
171 ldr r2, [r7, #-4]! @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
\r
176 beq 2f @ empty line
\r
179 bne 3f @ not singlecolor
\r
186 subs r9, r9, #(1<<24)
\r
187 add r1, r1, #328 @ set pointer to next line
\r
188 bmi 8f @ loop_exit with r0 restore
\r
190 ldr r2, [r7, #-4]! @ next pack
\r
194 mov r0, #0xf @ singlecol_loop might have messed r0
\r
198 bic r9, r9, #3 @ if we are here, it means we have empty and not empty line
\r
201 3: @ not empty, not singlecol
\r
206 4: @ not empty, not singlecol loop
\r
207 subs r9, r9, #(1<<24)
\r
208 add r1, r1, #328 @ set pointer to next line
\r
211 ldr r2, [r7, #-4]! @ next pack
\r
216 beq 4b @ empty line
\r
219 beq 7f @ singlecolor line
\r
228 TileLineSinglecol 1
\r
234 add r9, r9, #(1<<24) @ fix r9
\r
235 sub r1, r1, #328*8 @ restore pdest pointer
\r
239 @ TileLineSinglecolAl (r1=pdest, r4,r7=color)
\r
240 .macro TileLineSinglecolAl0
\r
245 .macro TileLineSinglecolAl1
\r
249 strb r4, [r1], #1+320
\r
253 .macro TileLineSinglecolAl2
\r
260 .macro TileLineSinglecolAl3
\r
264 strb r4, [r1], #1+320
\r
268 @ TileSinglecol (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=0xf
\r
269 @ kaligned==1, if dest is always aligned
\r
270 .macro TileSinglecol kaligned=0
\r
271 and r4, r2, #0xf @ we assume we have good r2 from previous time
\r
273 orr r4, r4, r4, lsl #8
\r
274 orr r4, r4, r4, lsl #16
\r
278 tst r1, #2 @ not aligned?
\r
284 TileLineSinglecolAl0
\r
285 TileLineSinglecolAl0
\r
286 TileLineSinglecolAl0
\r
287 TileLineSinglecolAl0
\r
288 TileLineSinglecolAl0
\r
289 TileLineSinglecolAl0
\r
290 TileLineSinglecolAl0
\r
291 TileLineSinglecolAl0
\r
296 TileLineSinglecolAl1
\r
297 TileLineSinglecolAl1
\r
298 TileLineSinglecolAl1
\r
299 TileLineSinglecolAl1
\r
300 TileLineSinglecolAl1
\r
301 TileLineSinglecolAl1
\r
302 TileLineSinglecolAl1
\r
303 TileLineSinglecolAl1
\r
310 TileLineSinglecolAl2
\r
311 TileLineSinglecolAl2
\r
312 TileLineSinglecolAl2
\r
313 TileLineSinglecolAl2
\r
314 TileLineSinglecolAl2
\r
315 TileLineSinglecolAl2
\r
316 TileLineSinglecolAl2
\r
317 TileLineSinglecolAl2
\r
321 TileLineSinglecolAl3
\r
322 TileLineSinglecolAl3
\r
323 TileLineSinglecolAl3
\r
324 TileLineSinglecolAl3
\r
325 TileLineSinglecolAl3
\r
326 TileLineSinglecolAl3
\r
327 TileLineSinglecolAl3
\r
328 TileLineSinglecolAl3
\r
332 sub r1, r1, #328*8 @ restore pdest pointer
\r
337 @ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll]
\r
339 @static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend)
\r
341 .global DrawLayerFull
\r
344 stmfd sp!, {r4-r11,lr}
\r
346 mov r6, r1 @ hcache
\r
348 ldr r11, =(Pico+0x22228) @ Pico.video
\r
349 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
350 ldrb r5, [r11, #13] @ pvid->reg[13]
\r
351 ldrb r7, [r11, #11]
\r
354 and lr, lr, #0x00ff0000 @ lr=cells
\r
356 mov r5, r5, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)
\r
357 add r5, r5, r0, lsl #1 @ htab+=plane
\r
358 bic r5, r5, #0x00ff0000 @ just in case
\r
360 tst r7, #3 @ full screen scroll? (if ==0)
\r
361 ldrb r7, [r11, #16] @ ??hh??ww
\r
362 ldreqh r5, [r10, r5]
\r
363 biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff)
\r
364 movne r5, r5, lsr #1
\r
365 orrne r5, r5, #0x8000 @ this marks that we have htab pointer, not hscroll here
\r
369 orr r5, r5, r7, lsl #1+24
\r
370 orr r5, r5, #0x1f000000
\r
372 biclt r5, r5, #0x80000000
\r
373 biceq r5, r5, #0xc0000000
\r
374 bicgt r5, r5, #0xe0000000
\r
376 mov r9, r2, lsl #24
\r
377 orr r5, r5, r9, lsr #8 @ r5=(ymask<<24)|(trow<<16)|[htab||hscroll]
\r
381 subge r4, r4, #1 @ r4=shift[width] (5,6,6,7)
\r
384 orr lr, lr, r3, lsl #24 @ lr=(planeend<<24)|(cells<<16)|shift[width]
\r
387 mov r8, r8, lsl #24+5
\r
388 orr r8, r8, #0x1f000000
\r
392 ldreqb r4, [r11, #2]
\r
393 moveq r4, r4, lsr #3
\r
394 ldrneb r4, [r11, #4]
\r
396 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13
\r
398 ldr r11, =PicoDraw2FB @ r11=PicoDraw2FB
\r
399 sub r4, r9, #(START_ROW<<24)
\r
401 mov r4, r4, asr #24
\r
403 mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW);
\r
405 @ Get vertical scroll value:
\r
406 add r7, r10, #0x012000
\r
407 add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)
\r
410 moveq r7, r7, lsl #22
\r
411 movne r7, r7, lsl #6
\r
412 mov r7, r7, lsr #22 @ r7=vscroll (10 bits)
\r
414 orr lr, lr, r7, lsl #3
\r
415 mov lr, lr, ror #24 @ packed: cccccccc nnnvvvvv vvvvvsss pppppppp: cells, nametab, vscroll, shift[width], planeend
\r
418 addne lr, lr, #1 @ we have vertically clipped tiles due to vscroll, so we need 1 more row
\r
421 str r7, [r6], #4 @ push y-offset to tilecache
\r
423 mla r11, r4, r7, r11 @ scrpos+=(8-(vscroll&7))*328;
\r
425 mov r9, #0xff000000 @ r9=(prevcode<<8)|flags: 1~tile empty, 2~tile singlecolor
\r
428 mov r4, lr, lsl #11
\r
429 mov r4, r4, lsr #25 @ r4=vscroll>>3 (7 bits)
\r
430 add r4, r4, r5, lsr #16 @ +trow
\r
431 and r4, r4, r5, lsr #24 @ &=ymask
\r
433 and r7, r7, #7 @ shift[width]
\r
435 and r0, r0, #0x7000 @ nametab
\r
436 add r12,r0, r4, lsl r7 @ nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<<shift[width]);
\r
438 mov r4, lr, lsr #24
\r
439 orr r12,r12,r4, lsl #23
\r
440 mov r12,r12,lsl #1 @ (nametab_row|(cells<<24)) (halfword compliant)
\r
444 moveq r7, r5, lsl #22 @ hscroll (0-3FFh)
\r
445 moveq r7, r7, lsr #22
\r
446 beq .rtr_hscroll_done
\r
448 @ get hscroll from htab
\r
449 mov r7, r5, lsl #17
\r
450 ands r4, r5, #0x00ff0000
\r
451 add r7, r7, r4, lsl #5 @ +=trow<<4
\r
452 andne r4, lr, #0x3800
\r
453 subne r7, r7, r4, lsl #7 @ if(trow) htaddr-=(vscroll&7)<<1;
\r
454 mov r7, r7, lsr #16 @ halfwords
\r
458 and r8, r8, #0xff000000
\r
459 rsb r4, r7, #0 @ r4=tilex=(-ts->hscroll)>>3
\r
462 orr r8, r8, r4 @ r8=(xmask<<24)|tilex
\r
466 add r7, r7, #1 @ r7=dx=((ts->hscroll-1)&7)+1
\r
469 subeq r12,r12, #0x01000000 @ we will loop cells+1 times, so loop less when there is no hscroll
\r
471 add r1, r11, r7 @ r1=pdest
\r
475 @ r4 & r7 are scratch in this loop
\r
476 .rtrloop: @ 40-41 times
\r
478 subs r12,r12, #0x01000000
\r
483 and r7, r8, r8, lsr #24
\r
484 add r7, r10, r7, lsl #1
\r
485 bic r4, r12, #0xff000000 @ Pico.vram[nametab_row+(tilex&xmask)];
\r
486 ldrh r7, [r7, r4] @ r7=code (int, but from unsigned, no sign extend)
\r
492 bne .rtr_notsamecode
\r
493 @ we know stuff about this tile already
\r
495 bne .rtrloop @ empty tile
\r
497 bne .rtr_singlecolor @ singlecolor tile
\r
501 and r4, r9, #0x600000
\r
502 mov r9, r7, lsl #8 @ remember new code
\r
505 and r7, r7, #0x6000
\r
506 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
509 tst r9, #0x100000 @ vflip?
\r
512 tst r9, #0x080000 @ hflip?
\r
515 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
524 tst r9, #0x080000 @ hflip?
\r
525 bne .rtr_vflip_hflip
\r
539 @ *(*hcache)++ = code|(dx<<16)|(trow<<27);
\r
541 orr r7, r7, r4, lsl #16
\r
542 and r4, r5, #0x00ff0000
\r
543 orr r7, r7, r4, lsl #11 @ (trow<<27)
\r
544 str r7, [r6], #4 @ cache hi priority tile
\r
548 add r5, r5, #0x00010000
\r
550 cmp r4, lr, lsl #24
\r
551 bge .rtrloop_outer_exit
\r
552 add r11, r11, #328*8
\r
555 .rtrloop_outer_exit:
\r
557 @ terminate cache list
\r
559 str r0, [r6] @ save cache pointer
\r
561 ldmfd sp!, {r4-r11,lr}
\r
568 .global DrawTilesFromCacheF @ int *hc
\r
570 DrawTilesFromCacheF:
\r
571 stmfd sp!, {r4-r10,lr}
\r
573 mov r9, #0xff000000 @ r9=prevcode=-1
\r
574 mvn r6, #0 @ r6=prevy=-1
\r
576 ldr r4, =PicoDraw2FB @ r4=PicoDraw2FB
\r
577 ldr r1, [r0], #4 @ read y offset
\r
581 sub r12, r1, #(328*8*START_ROW) @ r12=scrpos
\r
583 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
588 @ *hcache++ = code|(dx<<16)|(trow<<27); // cache it
\r
591 ldr r7, [r8], #4 @ read code
\r
592 movs r1, r7, lsr #16 @ r1=dx;
\r
593 ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return
\r
596 cmp r6, r7, lsr #27
\r
597 movne r6, r7, lsr #27
\r
599 mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8
\r
601 bic r1, r1, #0xf800
\r
602 add r1, r5, r1 @ r1=pdest (halfwords)
\r
604 mov r7, r7, lsl #16
\r
605 mov r7, r7, lsr #16
\r
608 bne .dtfcf_notsamecode
\r
609 @ we know stuff about this tile already
\r
611 bne .dtfcf_loop @ empty tile
\r
613 bne .dtfcf_singlecolor @ singlecolor tile
\r
616 .dtfcf_notsamecode:
\r
617 and r4, r9, #0x600000
\r
618 mov r9, r7, lsl #8 @ remember new code
\r
621 and r7, r7, #0x6000
\r
622 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
627 tst r9, #0x100000 @ vflip?
\r
630 tst r9, #0x080000 @ hflip?
\r
633 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
642 tst r9, #0x080000 @ hflip?
\r
643 bne .dtfcf_vflip_hflip
\r
648 .dtfcf_vflip_hflip:
\r
652 .dtfcf_singlecolor:
\r
661 @ (tile_start<<16)|row_start
\r
662 .global DrawWindowFull @ int tstart, int tend, int prio
\r
665 stmfd sp!, {r4-r11,lr}
\r
667 ldr r11, =(Pico+0x22228) @ Pico.video
\r
668 ldrb r12, [r11, #3] @ pvid->reg[3]
\r
669 mov r12, r12, lsl #10
\r
672 mov r5, #1 @ nametab_step
\r
673 tst r4, #1 @ 40 cell mode?
\r
674 andne r12, r12, #0xf000 @ 0x3c<<10
\r
675 andeq r12, r12, #0xf800
\r
676 movne r5, r5, lsl #7
\r
677 moveq r5, r5, lsl #6 @ nametab_step
\r
680 mla r12, r5, r4, r12 @ nametab += nametab_step*start;
\r
682 mov r4, r0, lsr #16 @ r4=start_cell_h
\r
683 add r7, r12, r4, lsl #1
\r
685 @ fetch the first code now
\r
686 ldr r10, =(Pico+0x10000) @ lr=Pico.vram
\r
688 cmp r2, r7, lsr #15
\r
689 ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority
\r
691 rsb r8, r4, r1, lsr #16 @ cells (h)
\r
692 orr r8, r8, r4, lsl #8
\r
693 mov r4, r1, lsl #24
\r
694 sub r4, r4, r0, lsl #24
\r
695 orr r8, r8, r4, lsr #8 @ r8=cells_h|(start_cell_h<<8)|(cells_v<<16)
\r
696 sub r8, r8, #0x010000 @ adjust for algo
\r
698 mov r9, #0xff000000 @ r9=prevcode=-1
\r
700 ldr r11, =PicoDraw2FB @ r11=scrpos
\r
703 sub r4, r4, #START_ROW
\r
704 add r11, r11, #328*8
\r
708 mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW);
\r
712 and r6, r8, #0xff00 @ r6=tilex
\r
713 add r1, r11, r6, lsr #5 @ r1=pdest
\r
714 add r6, r12, r6, lsr #7
\r
715 add r6, r10, r6 @ r6=Pico.vram+nametab+tilex
\r
716 orr r8, r8, r8, lsl #24
\r
717 sub r8, r8, #0x01000000 @ cell loop counter
\r
720 @ r4 & r7 are scratch in this loop
\r
723 subs r8, r8, #0x01000000
\r
727 ldrh r7, [r6], #2 @ r7=code
\r
730 bne .dwf_notsamecode
\r
731 @ we know stuff about this tile already
\r
733 bne .dwfloop @ empty tile
\r
735 bne .dwf_singlecolor @ singlecolor tile
\r
739 and r4, r9, #0x600000
\r
740 mov r9, r7, lsl #8 @ remember new code
\r
743 and r7, r7, #0x6000
\r
744 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
748 tst r9, #0x100000 @ vflip?
\r
751 tst r9, #0x080000 @ hflip?
\r
754 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
763 tst r9, #0x080000 @ hflip?
\r
764 bne .dwf_vflip_hflip
\r
778 bic r8, r8, #0xff000000 @ fix r8
\r
779 subs r8, r8, #0x010000
\r
780 ldmmifd sp!, {r4-r11,pc}
\r
781 add r11, r11, #328*8
\r
782 add r12, r12, r5 @ nametab+=nametab_step
\r
788 @ ---------------- sprites ---------------
\r
790 .macro SpriteLoop hflip vflip
\r
792 mov r1, r5, lsr #24 @ height
\r
794 mla r11, r1, r0, r11 @ scrpos+=height*328*8;
\r
795 add r12, r12, r1, lsl #3 @ sy+=height*8
\r
800 add r8, r8, r1, lsl #3 @ sx+=width*8
\r
804 add r9, r9, r5, lsr #16
\r
805 sub r5, r5, #1 @ sub width
\r
809 cmp r8, #0 @ skip tiles hidden on the left of screen
\r
812 add r9, r9, r5, lsr #16
\r
821 add r8, r8, #8 @ sx+=8
\r
823 bic r5, r5, #0xff000000 @ fix height
\r
824 orr r5, r5, r5, lsl #16
\r
827 sub r5, r5, #1 @ width--
\r
828 movs r1, r5, lsl #24
\r
829 ldmmifd sp!, {r4-r11,pc} @ end of tile
\r
831 subs r8, r8, #8 @ sx-=8
\r
832 ldmlefd sp!, {r4-r11,pc} @ tile offscreen
\r
835 ldmgefd sp!, {r4-r11,pc} @ tile offscreen
\r
837 mov r6, r12 @ r6=sy
\r
838 add r1, r11, r8 @ pdest=scrpos+sx
\r
842 add r9, r9, #1<<8 @ tile++
\r
844 add r6, r6, #8 @ sy+=8
\r
850 subs r5, r5, #0x01000000
\r
853 sub r6, r6, #8 @ sy-=8
\r
858 cmp r6, #(START_ROW*8)
\r
861 cmp r6, #(END_ROW*8+8)
\r
864 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
865 Tile \hflip, \vflip
\r
870 .global DrawSpriteFull @ unsigned int *sprite
\r
873 stmfd sp!, {r4-r11,lr}
\r
875 ldr r3, [r0] @ sprite[0]
\r
877 mov r6, r5, lsr #30
\r
878 add r6, r6, #1 @ r6=width
\r
880 mov r5, r5, lsr #30
\r
881 add r5, r5, #1 @ r5=height
\r
883 mov r12, r3, lsl #23
\r
884 mov r12, r12, lsr #23
\r
886 ldr lr, [r0, #4] @ lr=code
\r
887 sub r12, r12, #0x78 @ r12=sy
\r
889 mov r8, r8, lsr #23
\r
890 sub r8, r8, #0x78 @ r8=sx
\r
892 mov r9, lr, lsl #21
\r
893 mov r9, r9, lsr #13 @ r9=tile<<8
\r
895 and r3, lr, #0x6000
\r
896 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30);
\r
898 ldr r11, =PicoDraw2FB @ r11=scrpos
\r
899 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
901 sub r1, r12, #(START_ROW*8)
\r
903 mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328;
\r
905 orr r5, r5, r5, lsl #16 @
\r
906 orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)
\r
908 tst lr, #0x1000 @ vflip?
\r
911 tst lr, #0x0800 @ hflip?
\r
920 tst lr, #0x0800 @ hflip?
\r
921 bne .dsf_vflip_hflip
\r
930 @ vim:filetype=armasm
\r