--- /dev/null
+/*\r
+ * assembly optimized versions of most funtions from draw2.c\r
+ * (C) notaz, 2006-2008\r
+ *\r
+ * This work is licensed under the terms of MAME license.\r
+ * See COPYING file in the top-level directory.\r
+ *\r
+ * this is highly specialized, be careful if changing related C code!\r
+ */\r
+\r
+.extern Pico\r
+.extern PicoDraw2FB\r
+\r
+@ define these constants in your include file:\r
+@ .equiv START_ROW, 1\r
+@ .equiv END_ROW, 27\r
+@ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered.\r
+#ifndef START_ROW\r
+#define START_ROW 0\r
+#endif\r
+#ifndef END_ROW\r
+#define END_ROW 28\r
+#endif\r
+\r
+.text\r
+.align 2\r
+\r
+.global BackFillFull @ int reg7\r
+\r
+BackFillFull:\r
+ stmfd sp!, {r4-r9,lr}\r
+\r
+ ldr lr, =PicoDraw2FB @ lr=PicoDraw2FB\r
+ mov r0, r0, lsl #26\r
+ ldr lr, [lr]\r
+ mov r0, r0, lsr #26\r
+ add lr, lr, #328*8\r
+\r
+ orr r0, r0, r0, lsl #8\r
+ orr r0, r0, r0, lsl #16\r
+\r
+ mov r1, r0 @ 25 opcodes wasted?\r
+ mov r2, r0\r
+ mov r3, r0\r
+ mov r4, r0\r
+ mov r5, r0\r
+ mov r6, r0\r
+ mov r7, r0\r
+ mov r8, r0\r
+ mov r9, r0\r
+\r
+ mov r12, #(END_ROW-START_ROW)*8\r
+\r
+ @ go go go!\r
+.bff_loop:\r
+ add lr, lr, #8\r
+ subs r12, r12, #1\r
+\r
+ stmia lr!, {r0-r9} @ 10*4*8\r
+ stmia lr!, {r0-r9}\r
+ stmia lr!, {r0-r9}\r
+ stmia lr!, {r0-r9}\r
+ stmia lr!, {r0-r9}\r
+ stmia lr!, {r0-r9}\r
+ stmia lr!, {r0-r9}\r
+ stmia lr!, {r0-r9}\r
+\r
+ bne .bff_loop\r
+\r
+ ldmfd sp!, {r4-r9,r12}\r
+ bx r12\r
+\r
+.pool\r
+\r
+@ -------- some macros --------\r
+\r
+\r
+@ helper\r
+@ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old\r
+.macro TileLineSinglecol notsinglecol=0\r
+ and r2, r2, #0xf @ #0x0000000f\r
+.if !\notsinglecol\r
+ cmp r2, r0, lsr #28 @ if these don't match,\r
+ bicne r9, r9, #2 @ it is a sign that whole tile is not singlecolor (only it's lines may be)\r
+.endif\r
+ orr r4, r3, r2\r
+ orr r4, r4, r4, lsl #8\r
+\r
+ tst r1, #1 @ not aligned?\r
+ strneb r4, [r1], #1\r
+ streqh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
+ sub r1, r1, #8\r
+.if !\notsinglecol\r
+ mov r0, #0xf\r
+ orr r0, r0, r2, lsl #28 @ we will need the old palindex later\r
+.endif\r
+.endm\r
+\r
+@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch\r
+.macro TileLineNorm\r
+ ands r4, r0, r2, lsr #12 @ #0x0000f000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1]\r
+ ands r4, r0, r2, lsr #8 @ #0x00000f00\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#1]\r
+ ands r4, r0, r2, lsr #4 @ #0x000000f0\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#2]\r
+ ands r4, r0, r2 @ #0x0000000f\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#3]\r
+ ands r4, r0, r2, lsr #28 @ #0xf0000000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#4]\r
+ ands r4, r0, r2, lsr #24 @ #0x0f000000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#5]\r
+ ands r4, r0, r2, lsr #20 @ #0x00f00000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#6]\r
+ ands r4, r0, r2, lsr #16 @ #0x000f0000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#7]\r
+.endm\r
+\r
+@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch\r
+.macro TileLineFlip\r
+ ands r4, r0, r2, lsr #16 @ #0x000f0000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1]\r
+ ands r4, r0, r2, lsr #20 @ #0x00f00000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#1]\r
+ ands r4, r0, r2, lsr #24 @ #0x0f000000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#2]\r
+ ands r4, r0, r2, lsr #28 @ #0xf0000000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#3]\r
+ ands r4, r0, r2 @ #0x0000000f\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#4]\r
+ ands r4, r0, r2, lsr #4 @ #0x000000f0\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#5]\r
+ ands r4, r0, r2, lsr #8 @ #0x00000f00\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#6]\r
+ ands r4, r0, r2, lsr #12 @ #0x0000f000\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#7]\r
+.endm\r
+\r
+@ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf\r
+.macro Tile hflip vflip\r
+ mov r7, r9, lsl #13 @ r9=code<<8; addr=(code&0x7ff)<<4;\r
+ add r7, r10, r7, lsr #16\r
+ orr r9, r9, #3 @ emptytile=singlecolor=1, r9 must be <code_16> 00000xxx\r
+.if \vflip\r
+ @ we read tilecodes in reverse order if we have vflip\r
+ add r7, r7, #8*4\r
+.endif\r
+ @ loop through 8 lines\r
+ orr r9, r9, #(7<<24)\r
+ b 1f @ loop_enter\r
+\r
+0: @ singlecol_loop\r
+ subs r9, r9, #(1<<24)\r
+ add r1, r1, #328 @ set pointer to next line\r
+ bmi 8f @ loop_exit with r0 restore\r
+1:\r
+.if \vflip\r
+ ldr r2, [r7, #-4]! @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+.else\r
+ ldr r2, [r7], #4\r
+.endif\r
+ tst r2, r2\r
+ beq 2f @ empty line\r
+ bic r9, r9, #1\r
+ cmp r2, r2, ror #4\r
+ bne 3f @ not singlecolor\r
+ TileLineSinglecol\r
+ b 0b\r
+\r
+2:\r
+ bic r9, r9, #2\r
+2: @ empty_loop\r
+ subs r9, r9, #(1<<24)\r
+ add r1, r1, #328 @ set pointer to next line\r
+ bmi 8f @ loop_exit with r0 restore\r
+.if \vflip\r
+ ldr r2, [r7, #-4]! @ next pack\r
+.else\r
+ ldr r2, [r7], #4\r
+.endif\r
+ mov r0, #0xf @ singlecol_loop might have messed r0\r
+ tst r2, r2\r
+ beq 2b\r
+\r
+ bic r9, r9, #3 @ if we are here, it means we have empty and not empty line\r
+ b 5f\r
+\r
+3: @ not empty, not singlecol\r
+ mov r0, #0xf\r
+ bic r9, r9, #3\r
+ b 6f\r
+\r
+4: @ not empty, not singlecol loop\r
+ subs r9, r9, #(1<<24)\r
+ add r1, r1, #328 @ set pointer to next line\r
+ bmi 9f @ loop_exit\r
+.if \vflip\r
+ ldr r2, [r7, #-4]! @ next pack\r
+.else\r
+ ldr r2, [r7], #4\r
+.endif\r
+ tst r2, r2\r
+ beq 4b @ empty line\r
+5:\r
+ cmp r2, r2, ror #4\r
+ beq 7f @ singlecolor line\r
+6:\r
+.if \hflip\r
+ TileLineFlip\r
+.else\r
+ TileLineNorm\r
+.endif\r
+ b 4b\r
+7:\r
+ TileLineSinglecol 1\r
+ b 4b\r
+\r
+8:\r
+ mov r0, #0xf\r
+9: @ loop_exit\r
+ add r9, r9, #(1<<24) @ fix r9\r
+ sub r1, r1, #328*8 @ restore pdest pointer\r
+.endm\r
+\r
+\r
+@ TileLineSinglecolAl (r1=pdest, r4,r7=color)\r
+.macro TileLineSinglecolAl0\r
+ stmia r1!, {r4,r7}\r
+ add r1, r1, #320\r
+.endm\r
+\r
+.macro TileLineSinglecolAl1\r
+ strb r4, [r1], #1\r
+ strh r4, [r1], #2\r
+ str r4, [r1], #4\r
+ strb r4, [r1], #1+320\r
+@ add r1, r1, #320\r
+.endm\r
+\r
+.macro TileLineSinglecolAl2\r
+ strh r4, [r1], #2\r
+ str r4, [r1], #4\r
+ strh r4, [r1], #2\r
+ add r1, r1, #320\r
+.endm\r
+\r
+.macro TileLineSinglecolAl3\r
+ strb r4, [r1], #1\r
+ str r4, [r1], #4\r
+ strh r4, [r1], #2\r
+ strb r4, [r1], #1+320\r
+@ add r1, r1, #320\r
+.endm\r
+\r
+@ TileSinglecol (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=0xf\r
+@ kaligned==1, if dest is always aligned\r
+.macro TileSinglecol kaligned=0\r
+ and r4, r2, #0xf @ we assume we have good r2 from previous time\r
+ orr r4, r4, r3\r
+ orr r4, r4, r4, lsl #8\r
+ orr r4, r4, r4, lsl #16\r
+ mov r7, r4\r
+\r
+.if !\kaligned\r
+ tst r1, #2 @ not aligned?\r
+ bne 2f\r
+ tst r1, #1\r
+ bne 1f\r
+.endif\r
+\r
+ TileLineSinglecolAl0\r
+ TileLineSinglecolAl0\r
+ TileLineSinglecolAl0\r
+ TileLineSinglecolAl0\r
+ TileLineSinglecolAl0\r
+ TileLineSinglecolAl0\r
+ TileLineSinglecolAl0\r
+ TileLineSinglecolAl0\r
+\r
+.if !\kaligned\r
+ b 4f\r
+1:\r
+ TileLineSinglecolAl1\r
+ TileLineSinglecolAl1\r
+ TileLineSinglecolAl1\r
+ TileLineSinglecolAl1\r
+ TileLineSinglecolAl1\r
+ TileLineSinglecolAl1\r
+ TileLineSinglecolAl1\r
+ TileLineSinglecolAl1\r
+ b 4f\r
+\r
+2:\r
+ tst r1, #1\r
+ bne 3f\r
+\r
+ TileLineSinglecolAl2\r
+ TileLineSinglecolAl2\r
+ TileLineSinglecolAl2\r
+ TileLineSinglecolAl2\r
+ TileLineSinglecolAl2\r
+ TileLineSinglecolAl2\r
+ TileLineSinglecolAl2\r
+ TileLineSinglecolAl2\r
+ b 4f\r
+\r
+3:\r
+ TileLineSinglecolAl3\r
+ TileLineSinglecolAl3\r
+ TileLineSinglecolAl3\r
+ TileLineSinglecolAl3\r
+ TileLineSinglecolAl3\r
+ TileLineSinglecolAl3\r
+ TileLineSinglecolAl3\r
+ TileLineSinglecolAl3\r
+\r
+4:\r
+.endif\r
+ sub r1, r1, #328*8 @ restore pdest pointer\r
+.endm\r
+\r
+\r
+\r
+@ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll]\r
+\r
+@static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend)\r
+\r
+.global DrawLayerFull\r
+\r
+DrawLayerFull:\r
+ stmfd sp!, {r4-r11,lr}\r
+\r
+ mov r6, r1 @ hcache\r
+\r
+ ldr r11, =(Pico+0x22228) @ Pico.video\r
+ ldr r10, =(Pico+0x10000) @ r10=Pico.vram\r
+ ldrb r5, [r11, #13] @ pvid->reg[13]\r
+ ldrb r7, [r11, #11]\r
+\r
+ sub lr, r3, r2\r
+ and lr, lr, #0x00ff0000 @ lr=cells\r
+\r
+ mov r5, r5, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)\r
+ add r5, r5, r0, lsl #1 @ htab+=plane\r
+ bic r5, r5, #0x00ff0000 @ just in case\r
+\r
+ tst r7, #3 @ full screen scroll? (if ==0)\r
+ ldrb r7, [r11, #16] @ ??hh??ww\r
+ ldreqh r5, [r10, r5]\r
+ biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff)\r
+ movne r5, r5, lsr #1\r
+ orrne r5, r5, #0x8000 @ this marks that we have htab pointer, not hscroll here\r
+\r
+ and r8, r7, #3\r
+\r
+ orr r5, r5, r7, lsl #1+24\r
+ orr r5, r5, #0x1f000000\r
+ cmp r8, #1\r
+ biclt r5, r5, #0x80000000\r
+ biceq r5, r5, #0xc0000000\r
+ bicgt r5, r5, #0xe0000000\r
+\r
+ mov r9, r2, lsl #24\r
+ orr r5, r5, r9, lsr #8 @ r5=(ymask<<24)|(trow<<16)|[htab||hscroll]\r
+\r
+ add r4, r8, #5\r
+ cmp r4, #7\r
+ subge r4, r4, #1 @ r4=shift[width] (5,6,6,7)\r
+\r
+ orr lr, lr, r4 \r
+ orr lr, lr, r3, lsl #24 @ lr=(planeend<<24)|(cells<<16)|shift[width]\r
+\r
+ @ calculate xmask:\r
+ mov r8, r8, lsl #24+5\r
+ orr r8, r8, #0x1f000000\r
+\r
+ @ Find name table:\r
+ tst r0, r0\r
+ ldreqb r4, [r11, #2]\r
+ moveq r4, r4, lsr #3\r
+ ldrneb r4, [r11, #4]\r
+ and r4, r4, #7\r
+ orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13\r
+\r
+ ldr r11, =PicoDraw2FB @ r11=PicoDraw2FB\r
+ sub r4, r9, #(START_ROW<<24)\r
+ ldr r11, [r11]\r
+ mov r4, r4, asr #24\r
+ mov r7, #328*8\r
+ mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW);\r
+\r
+ @ Get vertical scroll value:\r
+ add r7, r10, #0x012000\r
+ add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)\r
+ ldr r7, [r7]\r
+ tst r0, r0\r
+ moveq r7, r7, lsl #22\r
+ movne r7, r7, lsl #6\r
+ mov r7, r7, lsr #22 @ r7=vscroll (10 bits)\r
+\r
+ orr lr, lr, r7, lsl #3\r
+ mov lr, lr, ror #24 @ packed: cccccccc nnnvvvvv vvvvvsss pppppppp: cells, nametab, vscroll, shift[width], planeend\r
+\r
+ ands r7, r7, #7\r
+ addne lr, lr, #1 @ we have vertically clipped tiles due to vscroll, so we need 1 more row\r
+\r
+ rsb r7, r7, #8\r
+ str r7, [r6], #4 @ push y-offset to tilecache\r
+ mov r4, #328\r
+ mla r11, r4, r7, r11 @ scrpos+=(8-(vscroll&7))*328;\r
+\r
+ mov r9, #0xff000000 @ r9=(prevcode<<8)|flags: 1~tile empty, 2~tile singlecolor\r
+\r
+.rtrloop_outer:\r
+ mov r4, lr, lsl #11\r
+ mov r4, r4, lsr #25 @ r4=vscroll>>3 (7 bits)\r
+ add r4, r4, r5, lsr #16 @ +trow\r
+ and r4, r4, r5, lsr #24 @ &=ymask\r
+ mov r7, lr, lsr #8\r
+ and r7, r7, #7 @ shift[width]\r
+ mov r0, lr, lsr #9\r
+ and r0, r0, #0x7000 @ nametab\r
+ add r12,r0, r4, lsl r7 @ nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<<shift[width]); \r
+\r
+ mov r4, lr, lsr #24\r
+ orr r12,r12,r4, lsl #23\r
+ mov r12,r12,lsl #1 @ (nametab_row|(cells<<24)) (halfword compliant)\r
+\r
+ @ htab?\r
+ tst r5, #0x8000\r
+ moveq r7, r5, lsl #22 @ hscroll (0-3FFh)\r
+ moveq r7, r7, lsr #22\r
+ beq .rtr_hscroll_done\r
+\r
+ @ get hscroll from htab\r
+ mov r7, r5, lsl #17\r
+ ands r4, r5, #0x00ff0000\r
+ add r7, r7, r4, lsl #5 @ +=trow<<4\r
+ andne r4, lr, #0x3800\r
+ subne r7, r7, r4, lsl #7 @ if(trow) htaddr-=(vscroll&7)<<1;\r
+ mov r7, r7, lsr #16 @ halfwords\r
+ ldrh r7, [r10, r7]\r
+\r
+.rtr_hscroll_done:\r
+ and r8, r8, #0xff000000\r
+ rsb r4, r7, #0 @ r4=tilex=(-ts->hscroll)>>3\r
+ mov r4, r4, asr #3\r
+ and r4, r4, #0xff\r
+ orr r8, r8, r4 @ r8=(xmask<<24)|tilex\r
+\r
+ sub r7, r7, #1\r
+ and r7, r7, #7\r
+ add r7, r7, #1 @ r7=dx=((ts->hscroll-1)&7)+1\r
+\r
+ cmp r7, #8\r
+ subeq r12,r12, #0x01000000 @ we will loop cells+1 times, so loop less when there is no hscroll\r
+\r
+ add r1, r11, r7 @ r1=pdest\r
+ mov r0, #0xf\r
+ b .rtrloop_enter\r
+\r
+ @ r4 & r7 are scratch in this loop\r
+.rtrloop: @ 40-41 times\r
+ add r1, r1, #8\r
+ subs r12,r12, #0x01000000\r
+ add r8, r8, #1\r
+ bmi .rtrloop_exit\r
+\r
+.rtrloop_enter:\r
+ and r7, r8, r8, lsr #24\r
+ add r7, r10, r7, lsl #1\r
+ bic r4, r12, #0xff000000 @ Pico.vram[nametab_row+(tilex&xmask)];\r
+ ldrh r7, [r7, r4] @ r7=code (int, but from unsigned, no sign extend)\r
+\r
+ tst r7, #0x8000\r
+ bne .rtr_hiprio\r
+\r
+ cmp r7, r9, lsr #8\r
+ bne .rtr_notsamecode\r
+ @ we know stuff about this tile already\r
+ tst r9, #1\r
+ bne .rtrloop @ empty tile\r
+ tst r9, #2\r
+ bne .rtr_singlecolor @ singlecolor tile\r
+ b .rtr_samecode\r
+\r
+.rtr_notsamecode:\r
+ and r4, r9, #0x600000\r
+ mov r9, r7, lsl #8 @ remember new code\r
+\r
+ @ update cram\r
+ and r7, r7, #0x6000\r
+ mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);\r
+\r
+.rtr_samecode:\r
+ tst r9, #0x100000 @ vflip?\r
+ bne .rtr_vflip\r
+\r
+ tst r9, #0x080000 @ hflip?\r
+ bne .rtr_hflip\r
+\r
+ @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf\r
+ Tile 0, 0\r
+ b .rtrloop\r
+\r
+.rtr_hflip:\r
+ Tile 1, 0\r
+ b .rtrloop\r
+\r
+.rtr_vflip:\r
+ tst r9, #0x080000 @ hflip?\r
+ bne .rtr_vflip_hflip\r
+\r
+ Tile 0, 1\r
+ b .rtrloop\r
+\r
+.rtr_vflip_hflip:\r
+ Tile 1, 1\r
+ b .rtrloop\r
+\r
+.rtr_singlecolor:\r
+ TileSinglecol\r
+ b .rtrloop\r
+\r
+.rtr_hiprio:\r
+ @ *(*hcache)++ = code|(dx<<16)|(trow<<27);\r
+ sub r4, r1, r11\r
+ orr r7, r7, r4, lsl #16\r
+ and r4, r5, #0x00ff0000\r
+ orr r7, r7, r4, lsl #11 @ (trow<<27)\r
+ str r7, [r6], #4 @ cache hi priority tile\r
+ b .rtrloop\r
+\r
+.rtrloop_exit:\r
+ add r5, r5, #0x00010000\r
+ mov r4, r5, lsl #8\r
+ cmp r4, lr, lsl #24\r
+ bge .rtrloop_outer_exit\r
+ add r11, r11, #328*8\r
+ b .rtrloop_outer\r
+\r
+.rtrloop_outer_exit:\r
+\r
+ @ terminate cache list\r
+ mov r0, #0\r
+ str r0, [r6] @ save cache pointer\r
+\r
+ ldmfd sp!, {r4-r11,lr}\r
+ bx lr\r
+\r
+.pool\r
+\r
+\r
+\r
+.global DrawTilesFromCacheF @ int *hc\r
+\r
+DrawTilesFromCacheF:\r
+ stmfd sp!, {r4-r10,lr}\r
+\r
+ mov r9, #0xff000000 @ r9=prevcode=-1\r
+ mvn r6, #0 @ r6=prevy=-1\r
+\r
+ ldr r4, =PicoDraw2FB @ r4=PicoDraw2FB\r
+ ldr r1, [r0], #4 @ read y offset\r
+ ldr r4, [r4]\r
+ mov r7, #328\r
+ mla r1, r7, r1, r4\r
+ sub r12, r1, #(328*8*START_ROW) @ r12=scrpos\r
+\r
+ ldr r10, =(Pico+0x10000) @ r10=Pico.vram\r
+ mov r8, r0 @ hc\r
+ mov r0, #0xf\r
+\r
+ @ scratch: r4, r7\r
+ @ *hcache++ = code|(dx<<16)|(trow<<27); // cache it\r
+\r
+.dtfcf_loop:\r
+ ldr r7, [r8], #4 @ read code\r
+ movs r1, r7, lsr #16 @ r1=dx;\r
+ ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return\r
+\r
+ @ row changed?\r
+ cmp r6, r7, lsr #27\r
+ movne r6, r7, lsr #27\r
+ movne r4, #328*8\r
+ mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8\r
+\r
+ bic r1, r1, #0xf800\r
+ add r1, r5, r1 @ r1=pdest (halfwords)\r
+\r
+ mov r7, r7, lsl #16\r
+ mov r7, r7, lsr #16\r
+\r
+ cmp r7, r9, lsr #8\r
+ bne .dtfcf_notsamecode\r
+ @ we know stuff about this tile already\r
+ tst r9, #1\r
+ bne .dtfcf_loop @ empty tile\r
+ tst r9, #2\r
+ bne .dtfcf_singlecolor @ singlecolor tile\r
+ b .dtfcf_samecode\r
+\r
+.dtfcf_notsamecode:\r
+ and r4, r9, #0x600000\r
+ mov r9, r7, lsl #8 @ remember new code\r
+\r
+ @ update cram val\r
+ and r7, r7, #0x6000\r
+ mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);\r
+\r
+\r
+.dtfcf_samecode:\r
+\r
+ tst r9, #0x100000 @ vflip?\r
+ bne .dtfcf_vflip\r
+\r
+ tst r9, #0x080000 @ hflip?\r
+ bne .dtfcf_hflip\r
+\r
+ @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf\r
+ Tile 0, 0\r
+ b .dtfcf_loop\r
+\r
+.dtfcf_hflip:\r
+ Tile 1, 0\r
+ b .dtfcf_loop\r
+\r
+.dtfcf_vflip:\r
+ tst r9, #0x080000 @ hflip?\r
+ bne .dtfcf_vflip_hflip\r
+\r
+ Tile 0, 1\r
+ b .dtfcf_loop\r
+\r
+.dtfcf_vflip_hflip:\r
+ Tile 1, 1\r
+ b .dtfcf_loop\r
+\r
+.dtfcf_singlecolor:\r
+ TileSinglecol\r
+ b .dtfcf_loop\r
+\r
+.pool\r
+\r
+\r
+@ @@@@@@@@@@@@@@@\r
+\r
+@ (tile_start<<16)|row_start\r
+.global DrawWindowFull @ int tstart, int tend, int prio\r
+\r
+DrawWindowFull:\r
+ stmfd sp!, {r4-r11,lr}\r
+\r
+ ldr r11, =(Pico+0x22228) @ Pico.video\r
+ ldrb r12, [r11, #3] @ pvid->reg[3]\r
+ mov r12, r12, lsl #10\r
+\r
+ ldr r4, [r11, #12]\r
+ mov r5, #1 @ nametab_step\r
+ tst r4, #1 @ 40 cell mode?\r
+ andne r12, r12, #0xf000 @ 0x3c<<10\r
+ andeq r12, r12, #0xf800\r
+ movne r5, r5, lsl #7\r
+ moveq r5, r5, lsl #6 @ nametab_step\r
+\r
+ and r4, r0, #0xff\r
+ mla r12, r5, r4, r12 @ nametab += nametab_step*start;\r
+\r
+ mov r4, r0, lsr #16 @ r4=start_cell_h\r
+ add r7, r12, r4, lsl #1\r
+\r
+ @ fetch the first code now\r
+ ldr r10, =(Pico+0x10000) @ lr=Pico.vram\r
+ ldrh r7, [r10, r7]\r
+ cmp r2, r7, lsr #15\r
+ ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority\r
+\r
+ rsb r8, r4, r1, lsr #16 @ cells (h)\r
+ orr r8, r8, r4, lsl #8\r
+ mov r4, r1, lsl #24\r
+ sub r4, r4, r0, lsl #24\r
+ orr r8, r8, r4, lsr #8 @ r8=cells_h|(start_cell_h<<8)|(cells_v<<16)\r
+ sub r8, r8, #0x010000 @ adjust for algo\r
+\r
+ mov r9, #0xff000000 @ r9=prevcode=-1\r
+\r
+ ldr r11, =PicoDraw2FB @ r11=scrpos\r
+ and r4, r0, #0xff\r
+ ldr r11, [r11]\r
+ sub r4, r4, #START_ROW\r
+ add r11, r11, #328*8\r
+ add r11, r11, #8\r
+\r
+ mov r7, #328*8\r
+ mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW);\r
+ mov r0, #0xf\r
+\r
+.dwfloop_outer:\r
+ and r6, r8, #0xff00 @ r6=tilex\r
+ add r1, r11, r6, lsr #5 @ r1=pdest\r
+ add r6, r12, r6, lsr #7\r
+ add r6, r10, r6 @ r6=Pico.vram+nametab+tilex\r
+ orr r8, r8, r8, lsl #24\r
+ sub r8, r8, #0x01000000 @ cell loop counter\r
+ b .dwfloop_enter\r
+\r
+ @ r4 & r7 are scratch in this loop\r
+.dwfloop:\r
+ add r1, r1, #8\r
+ subs r8, r8, #0x01000000\r
+ bmi .dwfloop_exit\r
+\r
+.dwfloop_enter:\r
+ ldrh r7, [r6], #2 @ r7=code\r
+\r
+ cmp r7, r9, lsr #8\r
+ bne .dwf_notsamecode\r
+ @ we know stuff about this tile already\r
+ tst r9, #1\r
+ bne .dwfloop @ empty tile\r
+ tst r9, #2\r
+ bne .dwf_singlecolor @ singlecolor tile\r
+ b .dwf_samecode\r
+\r
+.dwf_notsamecode:\r
+ and r4, r9, #0x600000\r
+ mov r9, r7, lsl #8 @ remember new code\r
+\r
+ @ update cram val\r
+ and r7, r7, #0x6000\r
+ mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);\r
+\r
+.dwf_samecode:\r
+\r
+ tst r9, #0x100000 @ vflip?\r
+ bne .dwf_vflip\r
+\r
+ tst r9, #0x080000 @ hflip?\r
+ bne .dwf_hflip\r
+\r
+ @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf\r
+ Tile 0, 0\r
+ b .dwfloop\r
+\r
+.dwf_hflip:\r
+ Tile 1, 0\r
+ b .dwfloop\r
+\r
+.dwf_vflip:\r
+ tst r9, #0x080000 @ hflip?\r
+ bne .dwf_vflip_hflip\r
+\r
+ Tile 0, 1\r
+ b .dwfloop\r
+\r
+.dwf_vflip_hflip:\r
+ Tile 1, 1\r
+ b .dwfloop\r
+\r
+.dwf_singlecolor:\r
+ TileSinglecol 1\r
+ b .dwfloop\r
+\r
+.dwfloop_exit:\r
+ bic r8, r8, #0xff000000 @ fix r8\r
+ subs r8, r8, #0x010000\r
+ ldmmifd sp!, {r4-r11,pc}\r
+ add r11, r11, #328*8\r
+ add r12, r12, r5 @ nametab+=nametab_step\r
+ b .dwfloop_outer\r
+\r
+.pool\r
+\r
+\r
+@ ---------------- sprites ---------------\r
+\r
+.macro SpriteLoop hflip vflip\r
+.if \vflip\r
+ mov r1, r5, lsr #24 @ height\r
+ mov r0, #328*8\r
+ mla r11, r1, r0, r11 @ scrpos+=height*328*8;\r
+ add r12, r12, r1, lsl #3 @ sy+=height*8\r
+.endif\r
+ mov r0, #0xf\r
+.if \hflip\r
+ and r1, r5, #0xff\r
+ add r8, r8, r1, lsl #3 @ sx+=width*8\r
+58:\r
+ cmp r8, #336\r
+ blt 51f\r
+ add r9, r9, r5, lsr #16\r
+ sub r5, r5, #1 @ sub width\r
+ sub r8, r8, #8\r
+ b 58b\r
+.else\r
+ cmp r8, #0 @ skip tiles hidden on the left of screen\r
+ bgt 51f\r
+58:\r
+ add r9, r9, r5, lsr #16\r
+ sub r5, r5, #1\r
+ adds r8, r8, #8\r
+ ble 58b\r
+ b 51f\r
+.endif\r
+\r
+50: @ outer\r
+.if !\hflip\r
+ add r8, r8, #8 @ sx+=8\r
+.endif\r
+ bic r5, r5, #0xff000000 @ fix height\r
+ orr r5, r5, r5, lsl #16\r
+\r
+51: @ outer_enter\r
+ sub r5, r5, #1 @ width--\r
+ movs r1, r5, lsl #24\r
+ ldmmifd sp!, {r4-r11,pc} @ end of tile\r
+.if \hflip\r
+ subs r8, r8, #8 @ sx-=8\r
+ ldmlefd sp!, {r4-r11,pc} @ tile offscreen\r
+.else\r
+ cmp r8, #328\r
+ ldmgefd sp!, {r4-r11,pc} @ tile offscreen\r
+.endif\r
+ mov r6, r12 @ r6=sy\r
+ add r1, r11, r8 @ pdest=scrpos+sx\r
+ b 53f\r
+\r
+52: @ inner\r
+ add r9, r9, #1<<8 @ tile++\r
+.if !\vflip\r
+ add r6, r6, #8 @ sy+=8\r
+ add r1, r1, #328*8\r
+.endif\r
+\r
+53: @ inner_enter\r
+ @ end of sprite?\r
+ subs r5, r5, #0x01000000\r
+ bmi 50b @ ->outer\r
+.if \vflip\r
+ sub r6, r6, #8 @ sy-=8\r
+ sub r1, r1, #328*8\r
+.endif\r
+\r
+ @ offscreen?\r
+ cmp r6, #(START_ROW*8)\r
+ ble 52b\r
+\r
+ cmp r6, #(END_ROW*8+8)\r
+ bge 52b\r
+\r
+ @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf\r
+ Tile \hflip, \vflip\r
+ b 52b\r
+.endm\r
+\r
+\r
+.global DrawSpriteFull @ unsigned int *sprite\r
+\r
+DrawSpriteFull:\r
+ stmfd sp!, {r4-r11,lr}\r
+\r
+ ldr r3, [r0] @ sprite[0]\r
+ mov r5, r3, lsl #4\r
+ mov r6, r5, lsr #30\r
+ add r6, r6, #1 @ r6=width\r
+ mov r5, r5, lsl #2\r
+ mov r5, r5, lsr #30\r
+ add r5, r5, #1 @ r5=height\r
+\r
+ mov r12, r3, lsl #23\r
+ mov r12, r12, lsr #23\r
+\r
+ ldr lr, [r0, #4] @ lr=code\r
+ sub r12, r12, #0x78 @ r12=sy\r
+ mov r8, lr, lsl #7\r
+ mov r8, r8, lsr #23\r
+ sub r8, r8, #0x78 @ r8=sx\r
+\r
+ mov r9, lr, lsl #21\r
+ mov r9, r9, lsr #13 @ r9=tile<<8\r
+\r
+ and r3, lr, #0x6000\r
+ mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30);\r
+\r
+ ldr r11, =PicoDraw2FB @ r11=scrpos\r
+ ldr r10, =(Pico+0x10000) @ r10=Pico.vram\r
+ ldr r11, [r11]\r
+ sub r1, r12, #(START_ROW*8)\r
+ mov r0, #328\r
+ mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328;\r
+\r
+ orr r5, r5, r5, lsl #16 @\r
+ orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)\r
+\r
+ tst lr, #0x1000 @ vflip?\r
+ bne .dsf_vflip\r
+\r
+ tst lr, #0x0800 @ hflip?\r
+ bne .dsf_hflip\r
+\r
+ SpriteLoop 0, 0\r
+\r
+.dsf_hflip:\r
+ SpriteLoop 1, 0\r
+\r
+.dsf_vflip:\r
+ tst lr, #0x0800 @ hflip?\r
+ bne .dsf_vflip_hflip\r
+\r
+ SpriteLoop 0, 1\r
+\r
+.dsf_vflip_hflip:\r
+ SpriteLoop 1, 1\r
+\r
+.pool\r
+\r
+@ vim:filetype=armasm\r