1 @ assembly optimized versions of most funtions from draw2.c
\r
2 @ this is highly specialized, be careful if changing related C code!
\r
4 @ (c) Copyright 2006, notaz
\r
5 @ All Rights Reserved
\r
11 @ define these constants in your include file:
\r
12 @ .equiv START_ROW, 1
\r
13 @ .equiv END_ROW, 27
\r
14 @ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered.
\r
15 .include "port_config.s"
\r
18 .global BackFillFull @ int reg7
\r
21 stmfd sp!, {r4-r9,lr}
\r
23 ldr lr, =framebuff @ lr=framebuff
\r
29 orr r0, r0, r0, lsl #8
\r
30 orr r0, r0, r0, lsl #16
\r
32 mov r1, r0 @ 25 opcodes wasted?
\r
42 mov r12, #(END_ROW-START_ROW)*8
\r
49 stmia lr!, {r0-r9} @ 10*4*8
\r
60 ldmfd sp!, {r4-r9,r12}
\r
65 @ -------- some macros --------
\r
69 @ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old
\r
70 .macro TileLineSinglecol notsinglecol=0
\r
71 and r2, r2, #0xf @ #0x0000000f
\r
73 cmp r2, r0, lsr #28 @ if these don't match,
\r
74 bicne r9, r9, #2 @ it is a sign that whole tile is not singlecolor (only it's lines may be)
\r
77 orr r4, r4, r4, lsl #8
\r
79 tst r1, #1 @ not aligned?
\r
85 strneb r4, [r1], #1 @ have a remaining unaligned pixel?
\r
89 orr r0, r0, r2, lsl #28 @ we will need the old palindex later
\r
93 @ TileNorm (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
\r
95 ands r4, r0, r2, lsr #12 @ #0x0000f000
\r
98 ands r4, r0, r2, lsr #8 @ #0x00000f00
\r
101 ands r4, r0, r2, lsr #4 @ #0x000000f0
\r
104 ands r4, r0, r2 @ #0x0000000f
\r
107 ands r4, r0, r2, lsr #28 @ #0xf0000000
\r
110 ands r4, r0, r2, lsr #24 @ #0x0f000000
\r
113 ands r4, r0, r2, lsr #20 @ #0x00f00000
\r
116 ands r4, r0, r2, lsr #16 @ #0x000f0000
\r
121 @ TileFlip (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch
\r
122 .macro TileLineFlip
\r
123 ands r4, r0, r2, lsr #16 @ #0x000f0000
\r
126 ands r4, r0, r2, lsr #20 @ #0x00f00000
\r
129 ands r4, r0, r2, lsr #24 @ #0x0f000000
\r
132 ands r4, r0, r2, lsr #28 @ #0xf0000000
\r
135 ands r4, r0, r2 @ #0x0000000f
\r
138 ands r4, r0, r2, lsr #4 @ #0x000000f0
\r
141 ands r4, r0, r2, lsr #8 @ #0x00000f00
\r
144 ands r4, r0, r2, lsr #12 @ #0x0000f000
\r
149 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
150 .macro Tile hflip vflip
\r
151 mov r7, r9, lsl #13 @ r9=code<<8; addr=(code&0x7ff)<<4;
\r
152 add r7, r10, r7, lsr #16
\r
153 orr r9, r9, #3 @ emptytile=singlecolor=1, r9 must be <code_16> 00000xxx
\r
155 @ we read tilecodes in reverse order if we have vflip
\r
158 @ loop through 8 lines
\r
159 orr r9, r9, #(7<<24)
\r
162 0: @ singlecol_loop
\r
163 subs r9, r9, #(1<<24)
\r
164 add r1, r1, #328 @ set pointer to next line
\r
165 bmi 8f @ loop_exit with r0 restore
\r
168 ldr r2, [r7, #-4]! @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels
\r
173 beq 2f @ empty line
\r
176 bne 3f @ not singlecolor
\r
183 subs r9, r9, #(1<<24)
\r
184 add r1, r1, #328 @ set pointer to next line
\r
185 bmi 8f @ loop_exit with r0 restore
\r
187 ldr r2, [r7, #-4]! @ next pack
\r
191 mov r0, #0xf @ singlecol_loop might have messed r0
\r
195 bic r9, r9, #3 @ if we are here, it means we have empty and not empty line
\r
198 3: @ not empty, not singlecol
\r
203 4: @ not empty, not singlecol loop
\r
204 subs r9, r9, #(1<<24)
\r
205 add r1, r1, #328 @ set pointer to next line
\r
208 ldr r2, [r7, #-4]! @ next pack
\r
213 beq 4b @ empty line
\r
216 beq 7f @ singlecolor line
\r
225 TileLineSinglecol 1
\r
231 add r9, r9, #(1<<24) @ fix r9
\r
232 sub r1, r1, #328*8 @ restore pdest pointer
\r
236 @ TileLineSinglecolAl (r1=pdest, r4,r7=color)
\r
237 .macro TileLineSinglecolAl0
\r
242 .macro TileLineSinglecolAl1
\r
246 strb r4, [r1], #1+320
\r
250 .macro TileLineSinglecolAl2
\r
257 .macro TileLineSinglecolAl3
\r
261 strb r4, [r1], #1+320
\r
265 @ TileSinglecol (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=0xf
\r
266 @ kaligned==1, if dest is always aligned
\r
267 .macro TileSinglecol kaligned=0
\r
268 and r4, r2, #0xf @ we assume we have good r2 from previous time
\r
270 orr r4, r4, r4, lsl #8
\r
271 orr r4, r4, r4, lsl #16
\r
275 tst r1, #2 @ not aligned?
\r
281 TileLineSinglecolAl0
\r
282 TileLineSinglecolAl0
\r
283 TileLineSinglecolAl0
\r
284 TileLineSinglecolAl0
\r
285 TileLineSinglecolAl0
\r
286 TileLineSinglecolAl0
\r
287 TileLineSinglecolAl0
\r
288 TileLineSinglecolAl0
\r
293 TileLineSinglecolAl1
\r
294 TileLineSinglecolAl1
\r
295 TileLineSinglecolAl1
\r
296 TileLineSinglecolAl1
\r
297 TileLineSinglecolAl1
\r
298 TileLineSinglecolAl1
\r
299 TileLineSinglecolAl1
\r
300 TileLineSinglecolAl1
\r
307 TileLineSinglecolAl2
\r
308 TileLineSinglecolAl2
\r
309 TileLineSinglecolAl2
\r
310 TileLineSinglecolAl2
\r
311 TileLineSinglecolAl2
\r
312 TileLineSinglecolAl2
\r
313 TileLineSinglecolAl2
\r
314 TileLineSinglecolAl2
\r
318 TileLineSinglecolAl3
\r
319 TileLineSinglecolAl3
\r
320 TileLineSinglecolAl3
\r
321 TileLineSinglecolAl3
\r
322 TileLineSinglecolAl3
\r
323 TileLineSinglecolAl3
\r
324 TileLineSinglecolAl3
\r
325 TileLineSinglecolAl3
\r
329 sub r1, r1, #328*8 @ restore pdest pointer
\r
334 @ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll]
\r
336 @static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend)
\r
338 .global DrawLayerFull
\r
341 stmfd sp!, {r4-r11,lr}
\r
343 mov r6, r1 @ hcache
\r
346 and lr, lr, #0x00ff0000 @ lr=cells
\r
348 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
350 ldr r11, =(Pico+0x22228) @ Pico.video
\r
351 ldrb r5, [r11, #13] @ pvid->reg[13]
\r
352 mov r5, r5, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)
\r
353 add r5, r5, r0, lsl #1 @ htab+=plane
\r
354 bic r5, r5, #0x00ff0000 @ just in case
\r
356 ldrb r7, [r11, #11]
\r
357 tst r7, #3 @ full screen scroll? (if ==0)
\r
358 ldreqh r5, [r10, r5]
\r
359 biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff)
\r
360 movne r5, r5, lsr #1
\r
361 orrne r5, r5, #0x8000 @ this marks that we have htab pointer, not hscroll here
\r
363 ldrb r7, [r11, #16] @ ??hh??ww
\r
366 orr r5, r5, r7, lsl #1+24
\r
367 orr r5, r5, #0x1f000000
\r
369 biclt r5, r5, #0x80000000
\r
370 biceq r5, r5, #0xc0000000
\r
371 bicgt r5, r5, #0xe0000000
\r
373 mov r9, r2, lsl #24
\r
374 orr r5, r5, r9, lsr #8 @ r5=(ymask<<24)|(trow<<16)|[htab||hscroll]
\r
378 subge r4, r4, #1 @ r4=shift[width] (5,6,6,7)
\r
381 orr lr, lr, r3, lsl #24 @ lr=(planeend<<24)|(cells<<16)|shift[width]
\r
384 mov r8, r8, lsl #24+5
\r
385 orr r8, r8, #0x1f000000
\r
389 ldreqb r4, [r11, #2]
\r
390 moveq r4, r4, lsr #3
\r
391 ldrneb r4, [r11, #4]
\r
393 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13
\r
395 ldr r11, =framebuff @ r11=framebuff
\r
397 sub r4, r9, #(START_ROW<<24)
\r
398 mov r4, r4, asr #24
\r
400 mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW);
\r
402 @ Get vertical scroll value:
\r
403 add r7, r10, #0x012000
\r
404 add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)
\r
407 moveq r7, r7, lsl #22
\r
408 movne r7, r7, lsl #6
\r
409 mov r7, r7, lsr #22 @ r7=vscroll (10 bits)
\r
411 orr lr, lr, r7, lsl #3
\r
412 mov lr, lr, ror #24 @ packed: cccccccc nnnvvvvv vvvvvsss pppppppp: cells, nametab, vscroll, shift[width], planeend
\r
415 addne lr, lr, #1 @ we have vertically clipped tiles due to vscroll, so we need 1 more row
\r
418 str r7, [r6], #4 @ push y-offset to tilecache
\r
420 mla r11, r4, r7, r11 @ scrpos+=(8-(vscroll&7))*328;
\r
422 mov r9, #0xff000000 @ r9=(prevcode<<8)|flags: 1~tile empty, 2~tile singlecolor
\r
425 mov r4, lr, lsl #11
\r
426 mov r4, r4, lsr #25 @ r4=vscroll>>3 (7 bits)
\r
427 add r4, r4, r5, lsr #16 @ +trow
\r
428 and r4, r4, r5, lsr #24 @ &=ymask
\r
430 and r7, r7, #7 @ shift[width]
\r
432 and r0, r0, #0x7000 @ nametab
\r
433 add r12,r0, r4, lsl r7 @ nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<<shift[width]);
\r
435 mov r4, lr, lsr #24
\r
436 orr r12,r12,r4, lsl #23
\r
437 mov r12,r12,lsl #1 @ (nametab_row|(cells<<24)) (halfword compliant)
\r
441 moveq r7, r5, lsl #22 @ hscroll (0-3FFh)
\r
442 moveq r7, r7, lsr #22
\r
443 beq .rtr_hscroll_done
\r
445 @ get hscroll from htab
\r
446 mov r7, r5, lsl #17
\r
447 ands r4, r5, #0x00ff0000
\r
448 add r7, r7, r4, lsl #5 @ +=trow<<4
\r
449 andne r4, lr, #0x3800
\r
450 subne r7, r7, r4, lsl #7 @ if(trow) htaddr-=(vscroll&7)<<1;
\r
451 mov r7, r7, lsr #16 @ halfwords
\r
455 rsb r4, r7, #0 @ r4=tilex=(-ts->hscroll)>>3
\r
458 and r8, r8, #0xff000000
\r
459 orr r8, r8, r4 @ r8=(xmask<<24)|tilex
\r
463 add r7, r7, #1 @ r7=dx=((ts->hscroll-1)&7)+1
\r
466 subeq r12,r12, #0x01000000 @ we will loop cells+1 times, so loop less when there is no hscroll
\r
468 add r1, r11, r7 @ r1=pdest
\r
472 @ r4 & r7 are scratch in this loop
\r
473 .rtrloop: @ 40-41 times
\r
475 subs r12,r12, #0x01000000
\r
480 and r7, r8, r8, lsr #24
\r
481 add r7, r10, r7, lsl #1
\r
482 bic r4, r12, #0xff000000 @ Pico.vram[nametab_row+(tilex&xmask)];
\r
483 ldrh r7, [r7, r4] @ r7=code (int, but from unsigned, no sign extend)
\r
489 bne .rtr_notsamecode
\r
490 @ we know stuff about this tile already
\r
492 bne .rtrloop @ empty tile
\r
494 bne .rtr_singlecolor @ singlecolor tile
\r
498 and r4, r9, #0x600000
\r
499 mov r9, r7, lsl #8 @ remember new code
\r
502 and r7, r7, #0x6000
\r
503 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
506 tst r9, #0x100000 @ vflip?
\r
509 tst r9, #0x080000 @ hflip?
\r
512 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
521 tst r9, #0x080000 @ hflip?
\r
522 bne .rtr_vflip_hflip
\r
536 @ *(*hcache)++ = code|(dx<<16)|(trow<<27);
\r
538 orr r7, r7, r4, lsl #16
\r
539 and r4, r5, #0x00ff0000
\r
540 orr r7, r7, r4, lsl #11 @ (trow<<27)
\r
541 str r7, [r6], #4 @ cache hi priority tile
\r
545 add r5, r5, #0x00010000
\r
547 cmp r4, lr, lsl #24
\r
548 bge .rtrloop_outer_exit
\r
549 add r11, r11, #328*8
\r
552 .rtrloop_outer_exit:
\r
554 @ terminate cache list
\r
556 str r0, [r6] @ save cache pointer
\r
558 ldmfd sp!, {r4-r11,lr}
\r
565 .global DrawTilesFromCacheF @ int *hc
\r
567 DrawTilesFromCacheF:
\r
568 stmfd sp!, {r4-r10,lr}
\r
570 mov r9, #0xff000000 @ r9=prevcode=-1
\r
571 mvn r6, #0 @ r6=prevy=-1
\r
573 ldr r4, =framebuff @ r4=framebuff
\r
575 ldr r1, [r0], #4 @ read y offset
\r
578 sub r12, r1, #(328*8*START_ROW) @ r12=scrpos
\r
580 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
585 @ *hcache++ = code|(dx<<16)|(trow<<27); // cache it
\r
588 ldr r7, [r8], #4 @ read code
\r
589 movs r1, r7, lsr #16 @ r1=dx;
\r
590 ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return
\r
593 cmp r6, r7, lsr #27
\r
594 movne r6, r7, lsr #27
\r
596 mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8
\r
598 bic r1, r1, #0xf800
\r
599 add r1, r5, r1 @ r1=pdest (halfwords)
\r
601 mov r7, r7, lsl #16
\r
602 mov r7, r7, lsr #16
\r
605 bne .dtfcf_notsamecode
\r
606 @ we know stuff about this tile already
\r
608 bne .dtfcf_loop @ empty tile
\r
610 bne .dtfcf_singlecolor @ singlecolor tile
\r
613 .dtfcf_notsamecode:
\r
614 and r4, r9, #0x600000
\r
615 mov r9, r7, lsl #8 @ remember new code
\r
618 and r7, r7, #0x6000
\r
619 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
624 tst r9, #0x100000 @ vflip?
\r
627 tst r9, #0x080000 @ hflip?
\r
630 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
639 tst r9, #0x080000 @ hflip?
\r
640 bne .dtfcf_vflip_hflip
\r
645 .dtfcf_vflip_hflip:
\r
649 .dtfcf_singlecolor:
\r
658 @ (tile_start<<16)|row_start
\r
659 .global DrawWindowFull @ int tstart, int tend, int prio
\r
662 stmfd sp!, {r4-r11,lr}
\r
664 ldr r11, =(Pico+0x22228) @ Pico.video
\r
665 ldrb r12, [r11, #3] @ pvid->reg[3]
\r
666 mov r12, r12, lsl #10
\r
669 mov r5, #1 @ nametab_step
\r
670 tst r4, #1 @ 40 cell mode?
\r
671 andne r12, r12, #0xf000 @ 0x3c<<10
\r
672 andeq r12, r12, #0xf800
\r
673 movne r5, r5, lsl #7
\r
674 moveq r5, r5, lsl #6 @ nametab_step
\r
677 mla r12, r5, r4, r12 @ nametab += nametab_step*start;
\r
679 mov r4, r0, lsr #16 @ r4=start_cell_h
\r
680 add r7, r12, r4, lsl #1
\r
682 @ fetch the first code now
\r
683 ldr r10, =(Pico+0x10000) @ lr=Pico.vram
\r
685 cmp r2, r7, lsr #15
\r
686 ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority
\r
688 rsb r8, r4, r1, lsr #16 @ cells (h)
\r
689 orr r8, r8, r4, lsl #8
\r
690 mov r4, r1, lsl #24
\r
691 sub r4, r4, r0, lsl #24
\r
692 orr r8, r8, r4, lsr #8 @ r8=cells_h|(start_cell_h<<8)|(cells_v<<16)
\r
693 sub r8, r8, #0x010000 @ adjust for algo
\r
695 mov r9, #0xff000000 @ r9=prevcode=-1
\r
697 ldr r11, =framebuff @ r11=scrpos
\r
699 add r11, r11, #328*8
\r
703 sub r4, r4, #START_ROW
\r
705 mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW);
\r
709 and r6, r8, #0xff00 @ r6=tilex
\r
710 add r1, r11, r6, lsr #5 @ r1=pdest
\r
711 add r6, r12, r6, lsr #7
\r
712 add r6, r10, r6 @ r6=Pico.vram+nametab+tilex
\r
713 orr r8, r8, r8, lsl #24
\r
714 sub r8, r8, #0x01000000 @ cell loop counter
\r
717 @ r4 & r7 are scratch in this loop
\r
720 subs r8, r8, #0x01000000
\r
724 ldrh r7, [r6], #2 @ r7=code
\r
727 bne .dwf_notsamecode
\r
728 @ we know stuff about this tile already
\r
730 bne .dwfloop @ empty tile
\r
732 bne .dwf_singlecolor @ singlecolor tile
\r
736 and r4, r9, #0x600000
\r
737 mov r9, r7, lsl #8 @ remember new code
\r
740 and r7, r7, #0x6000
\r
741 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);
\r
745 tst r9, #0x100000 @ vflip?
\r
748 tst r9, #0x080000 @ hflip?
\r
751 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
760 tst r9, #0x080000 @ hflip?
\r
761 bne .dwf_vflip_hflip
\r
775 bic r8, r8, #0xff000000 @ fix r8
\r
776 subs r8, r8, #0x010000
\r
777 ldmmifd sp!, {r4-r11,pc}
\r
778 add r11, r11, #328*8
\r
779 add r12, r12, r5 @ nametab+=nametab_step
\r
785 @ ---------------- sprites ---------------
\r
787 .macro SpriteLoop hflip vflip
\r
789 mov r1, r5, lsr #24 @ height
\r
791 mla r11, r1, r0, r11 @ scrpos+=height*328*8;
\r
792 add r12, r12, r1, lsl #3 @ sy+=height*8
\r
797 add r8, r8, r1, lsl #3 @ sx+=width*8
\r
801 add r9, r9, r5, lsr #16
\r
802 sub r5, r5, #1 @ sub width
\r
806 cmp r8, #0 @ skip tiles hidden on the left of screen
\r
809 add r9, r9, r5, lsr #16
\r
818 add r8, r8, #8 @ sx+=8
\r
820 bic r5, r5, #0xff000000 @ fix height
\r
821 orr r5, r5, r5, lsl #16
\r
824 sub r5, r5, #1 @ width--
\r
825 movs r1, r5, lsl #24
\r
826 ldmmifd sp!, {r4-r11,pc} @ end of tile
\r
828 subs r8, r8, #8 @ sx-=8
\r
829 ldmlefd sp!, {r4-r11,pc} @ tile offscreen
\r
832 ldmgefd sp!, {r4-r11,pc} @ tile offscreen
\r
834 mov r6, r12 @ r6=sy
\r
835 add r1, r11, r8 @ pdest=scrpos+sx
\r
839 add r9, r9, #1<<8 @ tile++
\r
841 add r6, r6, #8 @ sy+=8
\r
847 subs r5, r5, #0x01000000
\r
850 sub r6, r6, #8 @ sy-=8
\r
855 cmp r6, #(START_ROW*8)
\r
858 cmp r6, #(END_ROW*8+8)
\r
861 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf
\r
862 Tile \hflip, \vflip
\r
867 .global DrawSpriteFull @ unsigned int *sprite
\r
870 stmfd sp!, {r4-r11,lr}
\r
872 ldr r3, [r0] @ sprite[0]
\r
874 mov r6, r5, lsr #30
\r
875 add r6, r6, #1 @ r6=width
\r
877 mov r5, r5, lsr #30
\r
878 add r5, r5, #1 @ r5=height
\r
880 mov r12, r3, lsl #23
\r
881 mov r12, r12, lsr #23
\r
882 sub r12, r12, #0x78 @ r12=sy
\r
884 ldr lr, [r0, #4] @ lr=code
\r
886 mov r8, r8, lsr #23
\r
887 sub r8, r8, #0x78 @ r8=sx
\r
889 mov r9, lr, lsl #21
\r
890 mov r9, r9, lsr #13 @ r9=tile<<8
\r
892 and r3, lr, #0x6000
\r
893 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30);
\r
895 ldr r10, =(Pico+0x10000) @ r10=Pico.vram
\r
897 ldr r11, =framebuff @ r11=scrpos
\r
899 sub r1, r12, #(START_ROW*8)
\r
901 mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328;
\r
903 orr r5, r5, r5, lsl #16 @
\r
904 orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)
\r
906 tst lr, #0x1000 @ vflip?
\r
909 tst lr, #0x0800 @ hflip?
\r
918 tst lr, #0x0800 @ hflip?
\r
919 bne .dsf_vflip_hflip
\r