1 @ vim:filetype=armasm
\r
3 @ assembly optimized versions of most funtions from draw2.c
\r
4 @ this is highly specialized, be careful if changing related C code!
\r
6 @ (c) Copyright 2007, Grazvydas "notaz" Ignotas
\r
7 @ All Rights Reserved
\r
13 @ define these constants in your include file:
\r
14 @ .equiv START_ROW, 1
\r
15 @ .equiv END_ROW, 27
\r
16 @ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered.
\r
17 .include "port_config.s"
\r
20 .global BackFillFull @ int reg7
\r
23 stmfd sp!, {r4-r9,lr}
\r
25 ldr lr, =PicoDraw2FB @ lr=PicoDraw2FB
\r
31 orr r0, r0, r0, lsl #8
\r
32 orr r0, r0, r0, lsl #16
\r
34 mov r1, r0 @ 25 opcodes wasted?
\r
44 mov r12, #(END_ROW-START_ROW)*8
\r
51 stmia lr!, {r0-r9} @ 10*4*8
\r
62 ldmfd sp!, {r4-r9,r12}
\r
67 @ -------- some macros --------
\r
71 @ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old
\r
72 .macro TileLineSinglecol notsinglecol=0
\r
73 and r2, r2, #0xf @ #0x0000000f
\r
75 cmp r2, r0, lsr #28 @ if these don't match,
\r
76 bicne r9, r9, #2 @ it is a sign that whole tile is not singlecolor (only it's lines may be)
\r
79 orr r4, r4, r4, lsl #8
\r
81 tst r1, #1 @ not aligned?
\r
87 strneb r4, [r1], #1 @ have a remaining unaligned pixel?
\r
91 orr r0, r0, r2, lsl #28 @ we will need the old palindex later
\r
95 @ TileNorm (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
\r
97 ands r4, r0, r2, lsr #12 @ #0x0000f000
\r
100 ands r4, r0, r2, lsr #8 @ #0x00000f00
\r
103 ands r4, r0, r2, lsr #4 @ #0x000000f0
\r
106 ands r4, r0, r2 @ #0x0000000f
\r
109 ands r4, r0, r2, lsr #28 @ #0xf0000000
\r
112 ands r4, r0, r2, lsr #24 @ #0x0f000000
\r
115 ands r4, r0, r2, lsr #20 @ #0x00f00000
\r
118 ands r4, r0, r2, lsr #16 @ #0x000f0000
\r
123 @ TileFlip (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
\r
124 .macro TileLineFlip
\r
125 ands r4, r0, r2, lsr #16 @ #0x000f0000
\r
128 ands r4, r0, r2, lsr #20 @ #0x00f00000
\r
131 ands r4, r0, r2, lsr #24 @ #0x0f000000
\r
134 ands r4, r0, r2, lsr #28 @ #0xf0000000
\r
137 ands r4, r0, r2 @ #0x0000000f
\r
140 ands r4, r0, r2, lsr #4 @ #0x000000f0
\r
143 ands r4, r0, r2, lsr #8 @ #0x00000f00
\r
146 ands r4, r0, r2, lsr #12 @ #0x0000f000
\r
151 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
152 .macro Tile hflip vflip
\r
153 mov r7, r9, lsl #13 @ r9=code<<8; addr=(code&0x7ff)<<4;
\r
154 add r7, r10, r7, lsr #16
\r
155 orr r9, r9, #3 @ emptytile=singlecolor=1, r9 must be <code_16> 00000xxx
\r
157 @ we read tilecodes in reverse order if we have vflip
\r
160 @ loop through 8 lines
\r
161 orr r9, r9, #(7<<24)
\r
164 0: @ singlecol_loop
\r
165 subs r9, r9, #(1<<24)
\r
166 add r1, r1, #328 @ set pointer to next line
\r
167 bmi 8f @ loop_exit with r0 restore
\r
170 ldr r2, [r7, #-4]! @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
\r
175 beq 2f @ empty line
\r
178 bne 3f @ not singlecolor
\r
185 subs r9, r9, #(1<<24)
\r
186 add r1, r1, #328 @ set pointer to next line
\r
187 bmi 8f @ loop_exit with r0 restore
\r
189 ldr r2, [r7, #-4]! @ next pack
\r
193 mov r0, #0xf @ singlecol_loop might have messed r0
\r
197 bic r9, r9, #3 @ if we are here, it means we have empty and not empty line
\r
200 3: @ not empty, not singlecol
\r
205 4: @ not empty, not singlecol loop
\r
206 subs r9, r9, #(1<<24)
\r
207 add r1, r1, #328 @ set pointer to next line
\r
210 ldr r2, [r7, #-4]! @ next pack
\r
215 beq 4b @ empty line
\r
218 beq 7f @ singlecolor line
\r
227 TileLineSinglecol 1
\r
233 add r9, r9, #(1<<24) @ fix r9
\r
234 sub r1, r1, #328*8 @ restore pdest pointer
\r
238 @ TileLineSinglecolAl (r1=pdest, r4,r7=color)
\r
239 .macro TileLineSinglecolAl0
\r
244 .macro TileLineSinglecolAl1
\r
248 strb r4, [r1], #1+320
\r
252 .macro TileLineSinglecolAl2
\r
259 .macro TileLineSinglecolAl3
\r
263 strb r4, [r1], #1+320
\r
267 @ TileSinglecol (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=0xf
\r
268 @ kaligned==1, if dest is always aligned
\r
269 .macro TileSinglecol kaligned=0
\r
270 and r4, r2, #0xf @ we assume we have good r2 from previous time
\r
272 orr r4, r4, r4, lsl #8
\r
273 orr r4, r4, r4, lsl #16
\r
277 tst r1, #2 @ not aligned?
\r
283 TileLineSinglecolAl0
\r
284 TileLineSinglecolAl0
\r
285 TileLineSinglecolAl0
\r
286 TileLineSinglecolAl0
\r
287 TileLineSinglecolAl0
\r
288 TileLineSinglecolAl0
\r
289 TileLineSinglecolAl0
\r
290 TileLineSinglecolAl0
\r
295 TileLineSinglecolAl1
\r
296 TileLineSinglecolAl1
\r
297 TileLineSinglecolAl1
\r
298 TileLineSinglecolAl1
\r
299 TileLineSinglecolAl1
\r
300 TileLineSinglecolAl1
\r
301 TileLineSinglecolAl1
\r
302 TileLineSinglecolAl1
\r
309 TileLineSinglecolAl2
\r
310 TileLineSinglecolAl2
\r
311 TileLineSinglecolAl2
\r
312 TileLineSinglecolAl2
\r
313 TileLineSinglecolAl2
\r
314 TileLineSinglecolAl2
\r
315 TileLineSinglecolAl2
\r
316 TileLineSinglecolAl2
\r
320 TileLineSinglecolAl3
\r
321 TileLineSinglecolAl3
\r
322 TileLineSinglecolAl3
\r
323 TileLineSinglecolAl3
\r
324 TileLineSinglecolAl3
\r
325 TileLineSinglecolAl3
\r
326 TileLineSinglecolAl3
\r
327 TileLineSinglecolAl3
\r
331 sub r1, r1, #328*8 @ restore pdest pointer
\r
336 @ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll]
\r
338 @static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend)
\r
340 .global DrawLayerFull
\r
343 stmfd sp!, {r4-r11,lr}
\r
345 mov r6, r1 @ hcache
\r
347 ldr r11, =(Pico+0x22228) @ Pico.video
\r
348 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
349 ldrb r5, [r11, #13] @ pvid->reg[13]
\r
350 ldrb r7, [r11, #11]
\r
353 and lr, lr, #0x00ff0000 @ lr=cells
\r
355 mov r5, r5, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)
\r
356 add r5, r5, r0, lsl #1 @ htab+=plane
\r
357 bic r5, r5, #0x00ff0000 @ just in case
\r
359 tst r7, #3 @ full screen scroll? (if ==0)
\r
360 ldrb r7, [r11, #16] @ ??hh??ww
\r
361 ldreqh r5, [r10, r5]
\r
362 biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff)
\r
363 movne r5, r5, lsr #1
\r
364 orrne r5, r5, #0x8000 @ this marks that we have htab pointer, not hscroll here
\r
368 orr r5, r5, r7, lsl #1+24
\r
369 orr r5, r5, #0x1f000000
\r
371 biclt r5, r5, #0x80000000
\r
372 biceq r5, r5, #0xc0000000
\r
373 bicgt r5, r5, #0xe0000000
\r
375 mov r9, r2, lsl #24
\r
376 orr r5, r5, r9, lsr #8 @ r5=(ymask<<24)|(trow<<16)|[htab||hscroll]
\r
380 subge r4, r4, #1 @ r4=shift[width] (5,6,6,7)
\r
383 orr lr, lr, r3, lsl #24 @ lr=(planeend<<24)|(cells<<16)|shift[width]
\r
386 mov r8, r8, lsl #24+5
\r
387 orr r8, r8, #0x1f000000
\r
391 ldreqb r4, [r11, #2]
\r
392 moveq r4, r4, lsr #3
\r
393 ldrneb r4, [r11, #4]
\r
395 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13
\r
397 ldr r11, =PicoDraw2FB @ r11=PicoDraw2FB
\r
398 sub r4, r9, #(START_ROW<<24)
\r
400 mov r4, r4, asr #24
\r
402 mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW);
\r
404 @ Get vertical scroll value:
\r
405 add r7, r10, #0x012000
\r
406 add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)
\r
409 moveq r7, r7, lsl #22
\r
410 movne r7, r7, lsl #6
\r
411 mov r7, r7, lsr #22 @ r7=vscroll (10 bits)
\r
413 orr lr, lr, r7, lsl #3
\r
414 mov lr, lr, ror #24 @ packed: cccccccc nnnvvvvv vvvvvsss pppppppp: cells, nametab, vscroll, shift[width], planeend
\r
417 addne lr, lr, #1 @ we have vertically clipped tiles due to vscroll, so we need 1 more row
\r
420 str r7, [r6], #4 @ push y-offset to tilecache
\r
422 mla r11, r4, r7, r11 @ scrpos+=(8-(vscroll&7))*328;
\r
424 mov r9, #0xff000000 @ r9=(prevcode<<8)|flags: 1~tile empty, 2~tile singlecolor
\r
427 mov r4, lr, lsl #11
\r
428 mov r4, r4, lsr #25 @ r4=vscroll>>3 (7 bits)
\r
429 add r4, r4, r5, lsr #16 @ +trow
\r
430 and r4, r4, r5, lsr #24 @ &=ymask
\r
432 and r7, r7, #7 @ shift[width]
\r
434 and r0, r0, #0x7000 @ nametab
\r
435 add r12,r0, r4, lsl r7 @ nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<<shift[width]);
\r
437 mov r4, lr, lsr #24
\r
438 orr r12,r12,r4, lsl #23
\r
439 mov r12,r12,lsl #1 @ (nametab_row|(cells<<24)) (halfword compliant)
\r
443 moveq r7, r5, lsl #22 @ hscroll (0-3FFh)
\r
444 moveq r7, r7, lsr #22
\r
445 beq .rtr_hscroll_done
\r
447 @ get hscroll from htab
\r
448 mov r7, r5, lsl #17
\r
449 ands r4, r5, #0x00ff0000
\r
450 add r7, r7, r4, lsl #5 @ +=trow<<4
\r
451 andne r4, lr, #0x3800
\r
452 subne r7, r7, r4, lsl #7 @ if(trow) htaddr-=(vscroll&7)<<1;
\r
453 mov r7, r7, lsr #16 @ halfwords
\r
457 and r8, r8, #0xff000000
\r
458 rsb r4, r7, #0 @ r4=tilex=(-ts->hscroll)>>3
\r
461 orr r8, r8, r4 @ r8=(xmask<<24)|tilex
\r
465 add r7, r7, #1 @ r7=dx=((ts->hscroll-1)&7)+1
\r
468 subeq r12,r12, #0x01000000 @ we will loop cells+1 times, so loop less when there is no hscroll
\r
470 add r1, r11, r7 @ r1=pdest
\r
474 @ r4 & r7 are scratch in this loop
\r
475 .rtrloop: @ 40-41 times
\r
477 subs r12,r12, #0x01000000
\r
482 and r7, r8, r8, lsr #24
\r
483 add r7, r10, r7, lsl #1
\r
484 bic r4, r12, #0xff000000 @ Pico.vram[nametab_row+(tilex&xmask)];
\r
485 ldrh r7, [r7, r4] @ r7=code (int, but from unsigned, no sign extend)
\r
491 bne .rtr_notsamecode
\r
492 @ we know stuff about this tile already
\r
494 bne .rtrloop @ empty tile
\r
496 bne .rtr_singlecolor @ singlecolor tile
\r
500 and r4, r9, #0x600000
\r
501 mov r9, r7, lsl #8 @ remember new code
\r
504 and r7, r7, #0x6000
\r
505 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
508 tst r9, #0x100000 @ vflip?
\r
511 tst r9, #0x080000 @ hflip?
\r
514 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
523 tst r9, #0x080000 @ hflip?
\r
524 bne .rtr_vflip_hflip
\r
538 @ *(*hcache)++ = code|(dx<<16)|(trow<<27);
\r
540 orr r7, r7, r4, lsl #16
\r
541 and r4, r5, #0x00ff0000
\r
542 orr r7, r7, r4, lsl #11 @ (trow<<27)
\r
543 str r7, [r6], #4 @ cache hi priority tile
\r
547 add r5, r5, #0x00010000
\r
549 cmp r4, lr, lsl #24
\r
550 bge .rtrloop_outer_exit
\r
551 add r11, r11, #328*8
\r
554 .rtrloop_outer_exit:
\r
556 @ terminate cache list
\r
558 str r0, [r6] @ save cache pointer
\r
560 ldmfd sp!, {r4-r11,lr}
\r
567 .global DrawTilesFromCacheF @ int *hc
\r
569 DrawTilesFromCacheF:
\r
570 stmfd sp!, {r4-r10,lr}
\r
572 mov r9, #0xff000000 @ r9=prevcode=-1
\r
573 mvn r6, #0 @ r6=prevy=-1
\r
575 ldr r4, =PicoDraw2FB @ r4=PicoDraw2FB
\r
576 ldr r1, [r0], #4 @ read y offset
\r
580 sub r12, r1, #(328*8*START_ROW) @ r12=scrpos
\r
582 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
587 @ *hcache++ = code|(dx<<16)|(trow<<27); // cache it
\r
590 ldr r7, [r8], #4 @ read code
\r
591 movs r1, r7, lsr #16 @ r1=dx;
\r
592 ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return
\r
595 cmp r6, r7, lsr #27
\r
596 movne r6, r7, lsr #27
\r
598 mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8
\r
600 bic r1, r1, #0xf800
\r
601 add r1, r5, r1 @ r1=pdest (halfwords)
\r
603 mov r7, r7, lsl #16
\r
604 mov r7, r7, lsr #16
\r
607 bne .dtfcf_notsamecode
\r
608 @ we know stuff about this tile already
\r
610 bne .dtfcf_loop @ empty tile
\r
612 bne .dtfcf_singlecolor @ singlecolor tile
\r
615 .dtfcf_notsamecode:
\r
616 and r4, r9, #0x600000
\r
617 mov r9, r7, lsl #8 @ remember new code
\r
620 and r7, r7, #0x6000
\r
621 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
626 tst r9, #0x100000 @ vflip?
\r
629 tst r9, #0x080000 @ hflip?
\r
632 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
641 tst r9, #0x080000 @ hflip?
\r
642 bne .dtfcf_vflip_hflip
\r
647 .dtfcf_vflip_hflip:
\r
651 .dtfcf_singlecolor:
\r
660 @ (tile_start<<16)|row_start
\r
661 .global DrawWindowFull @ int tstart, int tend, int prio
\r
664 stmfd sp!, {r4-r11,lr}
\r
666 ldr r11, =(Pico+0x22228) @ Pico.video
\r
667 ldrb r12, [r11, #3] @ pvid->reg[3]
\r
668 mov r12, r12, lsl #10
\r
671 mov r5, #1 @ nametab_step
\r
672 tst r4, #1 @ 40 cell mode?
\r
673 andne r12, r12, #0xf000 @ 0x3c<<10
\r
674 andeq r12, r12, #0xf800
\r
675 movne r5, r5, lsl #7
\r
676 moveq r5, r5, lsl #6 @ nametab_step
\r
679 mla r12, r5, r4, r12 @ nametab += nametab_step*start;
\r
681 mov r4, r0, lsr #16 @ r4=start_cell_h
\r
682 add r7, r12, r4, lsl #1
\r
684 @ fetch the first code now
\r
685 ldr r10, =(Pico+0x10000) @ lr=Pico.vram
\r
687 cmp r2, r7, lsr #15
\r
688 ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority
\r
690 rsb r8, r4, r1, lsr #16 @ cells (h)
\r
691 orr r8, r8, r4, lsl #8
\r
692 mov r4, r1, lsl #24
\r
693 sub r4, r4, r0, lsl #24
\r
694 orr r8, r8, r4, lsr #8 @ r8=cells_h|(start_cell_h<<8)|(cells_v<<16)
\r
695 sub r8, r8, #0x010000 @ adjust for algo
\r
697 mov r9, #0xff000000 @ r9=prevcode=-1
\r
699 ldr r11, =PicoDraw2FB @ r11=scrpos
\r
702 sub r4, r4, #START_ROW
\r
703 add r11, r11, #328*8
\r
707 mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW);
\r
711 and r6, r8, #0xff00 @ r6=tilex
\r
712 add r1, r11, r6, lsr #5 @ r1=pdest
\r
713 add r6, r12, r6, lsr #7
\r
714 add r6, r10, r6 @ r6=Pico.vram+nametab+tilex
\r
715 orr r8, r8, r8, lsl #24
\r
716 sub r8, r8, #0x01000000 @ cell loop counter
\r
719 @ r4 & r7 are scratch in this loop
\r
722 subs r8, r8, #0x01000000
\r
726 ldrh r7, [r6], #2 @ r7=code
\r
729 bne .dwf_notsamecode
\r
730 @ we know stuff about this tile already
\r
732 bne .dwfloop @ empty tile
\r
734 bne .dwf_singlecolor @ singlecolor tile
\r
738 and r4, r9, #0x600000
\r
739 mov r9, r7, lsl #8 @ remember new code
\r
742 and r7, r7, #0x6000
\r
743 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
747 tst r9, #0x100000 @ vflip?
\r
750 tst r9, #0x080000 @ hflip?
\r
753 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
762 tst r9, #0x080000 @ hflip?
\r
763 bne .dwf_vflip_hflip
\r
777 bic r8, r8, #0xff000000 @ fix r8
\r
778 subs r8, r8, #0x010000
\r
779 ldmmifd sp!, {r4-r11,pc}
\r
780 add r11, r11, #328*8
\r
781 add r12, r12, r5 @ nametab+=nametab_step
\r
787 @ ---------------- sprites ---------------
\r
789 .macro SpriteLoop hflip vflip
\r
791 mov r1, r5, lsr #24 @ height
\r
793 mla r11, r1, r0, r11 @ scrpos+=height*328*8;
\r
794 add r12, r12, r1, lsl #3 @ sy+=height*8
\r
799 add r8, r8, r1, lsl #3 @ sx+=width*8
\r
803 add r9, r9, r5, lsr #16
\r
804 sub r5, r5, #1 @ sub width
\r
808 cmp r8, #0 @ skip tiles hidden on the left of screen
\r
811 add r9, r9, r5, lsr #16
\r
820 add r8, r8, #8 @ sx+=8
\r
822 bic r5, r5, #0xff000000 @ fix height
\r
823 orr r5, r5, r5, lsl #16
\r
826 sub r5, r5, #1 @ width--
\r
827 movs r1, r5, lsl #24
\r
828 ldmmifd sp!, {r4-r11,pc} @ end of tile
\r
830 subs r8, r8, #8 @ sx-=8
\r
831 ldmlefd sp!, {r4-r11,pc} @ tile offscreen
\r
834 ldmgefd sp!, {r4-r11,pc} @ tile offscreen
\r
836 mov r6, r12 @ r6=sy
\r
837 add r1, r11, r8 @ pdest=scrpos+sx
\r
841 add r9, r9, #1<<8 @ tile++
\r
843 add r6, r6, #8 @ sy+=8
\r
849 subs r5, r5, #0x01000000
\r
852 sub r6, r6, #8 @ sy-=8
\r
857 cmp r6, #(START_ROW*8)
\r
860 cmp r6, #(END_ROW*8+8)
\r
863 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
864 Tile \hflip, \vflip
\r
869 .global DrawSpriteFull @ unsigned int *sprite
\r
872 stmfd sp!, {r4-r11,lr}
\r
874 ldr r3, [r0] @ sprite[0]
\r
876 mov r6, r5, lsr #30
\r
877 add r6, r6, #1 @ r6=width
\r
879 mov r5, r5, lsr #30
\r
880 add r5, r5, #1 @ r5=height
\r
882 mov r12, r3, lsl #23
\r
883 mov r12, r12, lsr #23
\r
885 ldr lr, [r0, #4] @ lr=code
\r
886 sub r12, r12, #0x78 @ r12=sy
\r
888 mov r8, r8, lsr #23
\r
889 sub r8, r8, #0x78 @ r8=sx
\r
891 mov r9, lr, lsl #21
\r
892 mov r9, r9, lsr #13 @ r9=tile<<8
\r
894 and r3, lr, #0x6000
\r
895 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30);
\r
897 ldr r11, =PicoDraw2FB @ r11=scrpos
\r
898 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
900 sub r1, r12, #(START_ROW*8)
\r
902 mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328;
\r
904 orr r5, r5, r5, lsl #16 @
\r
905 orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)
\r
907 tst lr, #0x1000 @ vflip?
\r
910 tst lr, #0x0800 @ hflip?
\r
919 tst lr, #0x0800 @ hflip?
\r
920 bne .dsf_vflip_hflip
\r