--- /dev/null
+@ assembly "optimized" version of some funtions from draw.c\r
+@ this is highly specialized, be careful if changing related C code!\r
+\r
+@ (c) Copyright 2006, notaz\r
+@ All Rights Reserved\r
+\r
+\r
+.extern Pico\r
+.extern PicoOpt\r
+.extern HighCol\r
+.extern Scanline\r
+.extern HighSprZ\r
+.extern rendstatus\r
+.extern DrawLineInt\r
+.extern DrawLineDest\r
+.extern DrawStripVSRam\r
+.extern DrawStripInterlace\r
+\r
+\r
+@ helper\r
+.macro TilePixel pat lsrr offs\r
+.if !\lsrr\r
+ ands r4, \pat, r2\r
+.else\r
+ ands r4, \pat, r2, lsr #\lsrr\r
+.endif\r
+ orrne r4, r3, r4\r
+ strneb r4, [r1,#\offs]\r
+.endm\r
+\r
+@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
+.macro TileNorm pat\r
+ TilePixel \pat, 12, 0 @ #0x0000f000\r
+ TilePixel \pat, 8, 1 @ #0x00000f00\r
+ TilePixel \pat, 4, 2 @ #0x000000f0\r
+ TilePixel \pat, 0, 3 @ #0x0000000f\r
+ TilePixel \pat, 28, 4 @ #0xf0000000\r
+ TilePixel \pat, 24, 5 @ #0x0f000000\r
+ TilePixel \pat, 20, 6 @ #0x00f00000\r
+ TilePixel \pat, 16, 7 @ #0x000f0000\r
+.endm\r
+\r
+@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
+.macro TileFlip pat\r
+ TilePixel \pat, 16, 0 @ #0x000f0000\r
+ TilePixel \pat, 20, 1 @ #0x00f00000\r
+ TilePixel \pat, 24, 2 @ #0x0f000000\r
+ TilePixel \pat, 28, 3 @ #0xf0000000\r
+ TilePixel \pat, 0, 4 @ #0x0000000f\r
+ TilePixel \pat, 4, 5 @ #0x000000f0\r
+ TilePixel \pat, 8, 6 @ #0x00000f00\r
+ TilePixel \pat, 12, 7 @ #0x0000f000\r
+.endm\r
+\r
+@ shadow/hilight mode\r
+\r
+@ this one is for hi priority layer\r
+.macro TilePixelShHP pat lsrr offs\r
+ TilePixel \pat, \lsrr, \offs\r
+ ldreqb r4, [r1,#\offs]\r
+ tsteq r4, #0x80\r
+ andeq r4, r4, #0x3f\r
+ streqb r4, [r1,#\offs]\r
+.endm\r
+\r
+@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
+.macro TileNormShHP pat\r
+ TilePixelShHP \pat, 12, 0 @ #0x0000f000\r
+ TilePixelShHP \pat, 8, 1 @ #0x00000f00\r
+ TilePixelShHP \pat, 4, 2 @ #0x000000f0\r
+ TilePixelShHP \pat, 0, 3 @ #0x0000000f\r
+ TilePixelShHP \pat, 28, 4 @ #0xf0000000\r
+ TilePixelShHP \pat, 24, 5 @ #0x0f000000\r
+ TilePixelShHP \pat, 20, 6 @ #0x00f00000\r
+ TilePixelShHP \pat, 16, 7 @ #0x000f0000\r
+.endm\r
+\r
+@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
+.macro TileFlipShHP pat\r
+ TilePixelShHP \pat, 16, 0 @ #0x000f0000\r
+ TilePixelShHP \pat, 20, 1 @ #0x00f00000\r
+ TilePixelShHP \pat, 24, 2 @ #0x0f000000\r
+ TilePixelShHP \pat, 28, 3 @ #0xf0000000\r
+ TilePixelShHP \pat, 0, 4 @ #0x0000000f\r
+ TilePixelShHP \pat, 4, 5 @ #0x000000f0\r
+ TilePixelShHP \pat, 8, 6 @ #0x00000f00\r
+ TilePixelShHP \pat, 12, 7 @ #0x0000f000\r
+.endm\r
+\r
+\r
+@ TileSingleSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx; r12: helper pattern 0xf\r
+.macro TileSingleSh\r
+ tst r0, #1 @ not aligned?\r
+ mov r7, #0x00c000\r
+ orr r7, r7, #0xc0\r
+ ldrneb r4, [r1]\r
+ ldreqh r4, [r1]\r
+ orr r4, r4, r7\r
+ strneb r4, [r1], #1\r
+ streqh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ orr r4, r4, r7\r
+ strh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ orr r4, r4, r7\r
+ strh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ orr r4, r4, r7\r
+ strh r4, [r1], #2\r
+ ldrneb r4, [r1]\r
+ orr r4, r4, r7\r
+ strneb r4, [r1], #1\r
+.endm\r
+\r
+@ TileSingleHi (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
+.macro TileSingleHi\r
+ tst r1, #1 @ not aligned?\r
+ mov r7, #0x008000\r
+ orr r7, r7, #0x80\r
+ ldrneb r4, [r1]\r
+ ldreqh r4, [r1]\r
+ bic r4, r4, r7, lsr #1\r
+ orr r4, r4, r7\r
+ strneb r4, [r1], #1\r
+ streqh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ bic r4, r4, r7, lsr #1\r
+ orr r4, r4, r7\r
+ strh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ bic r4, r4, r7, lsr #1\r
+ orr r4, r4, r7\r
+ strh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ bic r4, r4, r7, lsr #1\r
+ orr r4, r4, r7\r
+ strh r4, [r1], #2\r
+ ldrneb r4, [r1]\r
+ bic r4, r4, r7, lsr #1\r
+ orr r4, r4, r7\r
+ strneb r4, [r1], #1\r
+.endm\r
+\r
+.macro TileDoShGenPixel shift ofs\r
+.if \shift\r
+ ands r4, r12, r2, lsr #\shift\r
+.else\r
+ ands r4, r12, r2\r
+.endif\r
+ beq 3f\r
+ cmp r4, #0xe\r
+ beq 2f\r
+ bgt 1f\r
+ orr r4, r3, r4\r
+ strb r4, [r1,#\ofs]\r
+ b 3f\r
+1:\r
+ ldrb r4, [r1,#\ofs]\r
+ orr r4, r4, #0xc0\r
+ strb r4, [r1,#\ofs]\r
+ b 3f\r
+2:\r
+ ldrb r4, [r1,#\ofs]\r
+ bic r4, r4, #0xc0\r
+ orr r4, r4, #0x80\r
+ strb r4, [r1,#\ofs]\r
+3:\r
+.endm\r
+\r
+@ TileFlipSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
+.macro TileFlipSh\r
+ TileDoShGenPixel 16, 0 @ #0x000f0000\r
+ TileDoShGenPixel 20, 1 @ #0x00f00000\r
+ TileDoShGenPixel 24, 2 @ #0x0f000000\r
+ TileDoShGenPixel 28, 3 @ #0xf0000000\r
+ TileDoShGenPixel 0, 4 @ #0x0000000f\r
+ TileDoShGenPixel 4, 5 @ #0x000000f0\r
+ TileDoShGenPixel 8, 6 @ #0x00000f00\r
+ TileDoShGenPixel 12, 7 @ #0x0000f000\r
+.endm\r
+\r
+@ TileNormSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
+.macro TileNormSh\r
+ TileDoShGenPixel 12, 0 @ #0x0000f000\r
+ TileDoShGenPixel 8, 1 @ #0x00000f00\r
+ TileDoShGenPixel 4, 2 @ #0x000000f0\r
+ TileDoShGenPixel 0, 3 @ #0x0000000f\r
+ TileDoShGenPixel 28, 4 @ #0xf0000000\r
+ TileDoShGenPixel 24, 5 @ #0x0f000000\r
+ TileDoShGenPixel 20, 6 @ #0x00f00000\r
+ TileDoShGenPixel 16, 7 @ #0x000f0000\r
+.endm\r
+\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+@ struct TileStrip\r
+@ {\r
+@ int nametab; // 0x00\r
+@ int line; // 0x04\r
+@ int hscroll; // 0x08\r
+@ int xmask; // 0x0C\r
+@ int *hc; // 0x10 (pointer to cache buffer)\r
+@ int cells; // 0x14\r
+@ };\r
+\r
+@ int DrawLayer(int plane, int *hcache, int maxcells, int sh)\r
+\r
+.global DrawLayer @ int plane, int *hcache, int maxcells, int sh\r
+\r
+DrawLayer:\r
+ stmfd sp!, {r4-r11,lr}\r
+\r
+ ldr r11, =(Pico+0x22228) @ Pico.video\r
+\r
+ mov r6, r1 @ hcache\r
+ orr r9, r2, r3, lsl #31 @ r9=maxcells|(sh<<31)\r
+\r
+ ldrb r7, [r11, #16] @ ??hh??ww\r
+\r
+ mov r1, r7, lsl #4\r
+ orr r1, r1, #0x00ff\r
+\r
+ and r10, r7, #3\r
+ cmp r10, #1\r
+ biclt r1, r1, #0xfc00\r
+ biceq r1, r1, #0xfe00\r
+ bicgt r1, r1, #0xff00 @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels\r
+\r
+ add r10, r10, #5\r
+ cmp r10, #7\r
+ subge r10, r10, #1 @ r10=shift[width] (5,6,6,7)\r
+\r
+ @ calculate xmask:\r
+ mov r8, #1\r
+ mov r5, r8, lsl r10\r
+ sub r5, r5, #1 @ r5=xmask\r
+\r
+ @ Find name table:\r
+ tst r0, r0\r
+ ldreqb r12, [r11, #2]\r
+ moveq r12, r12, lsl #10\r
+ ldrneb r12, [r11, #4]\r
+ movne r12, r12, lsl #13\r
+ and r12, r12, #(7<<13) @ r12=(ts->nametab<<1) (halfword compliant)\r
+\r
+ ldr r2, =Scanline\r
+ ldr r2, [r2]\r
+ ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
+\r
+ ldrh r8, [r11, #12]\r
+ mov r4, r8, lsr #8 @ pvid->reg[13]\r
+ mov r4, r4, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)\r
+ ldrb r7, [r11, #11]\r
+ tst r7, #2\r
+ addne r4, r4, r2, lsl #2 @ htab+=Scanline<<1; // Offset by line\r
+ tst r7, #1\r
+ biceq r4, r4, #0x1f @ htab&=~0xf; // Offset by tile\r
+ add r4, r4, r0, lsl #1 @ htab+=plane\r
+ bic r4, r4, #0x00ff0000 @ just in case\r
+ ldrh r3, [lr, r4] @ r3=hscroll\r
+\r
+ tst r7, #4\r
+ bne .DrawStrip_vsscroll\r
+\r
+ @ Get vertical scroll value:\r
+ add r7, lr, #0x012000\r
+ add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)\r
+ ldr r7, [r7]\r
+\r
+ tst r8, #2\r
+ tstne r8, #4\r
+ bne .DrawStrip_interlace\r
+\r
+ tst r0, r0\r
+ movne r7, r7, lsr #16\r
+\r
+ @ Find the line in the name table\r
+ add r2, r2, r7\r
+ and r2, r2, r1\r
+ mov r4, r2, lsr #3\r
+ add r10, r10, #1 @ shift[width]++\r
+ add r12, r12, r4, lsl r10 @ nametab+=(ts.line>>3)<<shift[width];\r
+\r
+ @ ldmia r0, {r1,r2,r3,r5,r6,r9} @ r2=line, r3=ts->hscroll, r5=ts->xmask, r6=ts->hc, r9=ts->cells\r
+@ mov r12,r1, lsl #1 @ r12=(ts->nametab<<1) (halfword compliant)\r
+\r
+ and r10,r2, #7\r
+ mov r10,r10, lsl #1 @ r10=ty=(ts->line&7)<<1;\r
+ orr r10,r10, r9, lsl #24\r
+\r
+ rsb r8, r3, #0\r
+ mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3\r
+\r
+ sub r1, r3, #1\r
+ and r1, r1, #7\r
+ add r4, r1, #1 @ r4=dx=((ts->hscroll-1)&7)+1\r
+\r
+ tst r9, #1<<31\r
+ mov r3, #0\r
+ orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|ty)\r
+ movne r3, #0x40 @ default to shadowed pal on sh mode\r
+\r
+ mvn r9, #0 @ r9=prevcode=-1\r
+\r
+ @ cache some stuff to avoid mem access\r
+@ ldr r11,=HighCol\r
+ ldr r11,=DrawLineInt\r
+ ldr r11,[r11]\r
+ add r1, r11, r4 @ r1=pdest\r
+ mov r0, #0xf\r
+\r
+ cmp r4, #8\r
+ subeq r10,r10, #0x01000000 @ we will loop cells+1 times, so loop less when there is no scroll\r
+ beq .dsloop_enter\r
+\r
+ @ do first iteration with clipping\r
+ and r7, r5, r8\r
+ add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)\r
+ ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend)\r
+\r
+ tst r7, #0x8000\r
+ bne .DrawStrip_hiprio\r
+\r
+ mov r9, r7 @ remember code\r
+\r
+ movs r2, r9, lsl #20 @ if (code&0x1000)\r
+ mov r2, r2, lsl #1\r
+ add r2, r2, r10, lsl #17\r
+ mov r2, r2, lsr #17\r
+ eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
+\r
+ ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+\r
+ bic r7, r3, #0x3f\r
+ and r3, r9, #0x6000\r
+ add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
+\r
+ tst r2, r2\r
+ beq .dsloop @ tileline blank\r
+\r
+ tst r9, #0x0800\r
+ addne r4, r4, #8\r
+\r
+ ldr pc, [pc, r4, lsl #2]\r
+ nop\r
+ .word .ds_tn1_px1 @ should not happen\r
+ .word .ds_tn1_px1\r
+ .word .ds_tn1_px2\r
+ .word .ds_tn1_px3\r
+ .word .ds_tn1_px4\r
+ .word .ds_tn1_px5\r
+ .word .ds_tn1_px6\r
+ .word .ds_tn1_px7\r
+ .word .dsloop @ should not happen\r
+\r
+ .word .ds_tf1_px1 @ ...\r
+ .word .ds_tf1_px1\r
+ .word .ds_tf1_px2\r
+ .word .ds_tf1_px3\r
+ .word .ds_tf1_px4\r
+ .word .ds_tf1_px5\r
+ .word .ds_tf1_px6\r
+ .word .ds_tf1_px7\r
+ .word .dsloop @ ...\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
+.ds_tn1_px1:\r
+ TilePixel r0, 8, 1 @ #0x00000f00\r
+.ds_tn1_px2:\r
+ TilePixel r0, 4, 2 @ #0x000000f0\r
+.ds_tn1_px3:\r
+ TilePixel r0, 0, 3 @ #0x0000000f\r
+.ds_tn1_px4:\r
+ TilePixel r0, 28, 4 @ #0xf0000000\r
+.ds_tn1_px5:\r
+ TilePixel r0, 24, 5 @ #0x0f000000\r
+.ds_tn1_px6:\r
+ TilePixel r0, 20, 6 @ #0x00f00000\r
+.ds_tn1_px7:\r
+ TilePixel r0, 16, 7 @ #0x000f0000\r
+ b .dsloop\r
+\r
+.ds_tf1_px1:\r
+ TilePixel r0, 20, 1 @ #0x00f00000\r
+.ds_tf1_px2:\r
+ TilePixel r0, 24, 2 @ #0x0f000000\r
+.ds_tf1_px3:\r
+ TilePixel r0, 28, 3 @ #0xf0000000\r
+.ds_tf1_px4:\r
+ TilePixel r0, 0, 4 @ #0x0000000f\r
+.ds_tf1_px5:\r
+ TilePixel r0, 4, 5 @ #0x000000f0\r
+.ds_tf1_px6:\r
+ TilePixel r0, 8, 6 @ #0x00000f00\r
+.ds_tf1_px7:\r
+ TilePixel r0, 12, 7 @ #0x0000f000\r
+\r
+\r
+ @ r4 & r7 are scratch in this loop\r
+.dsloop: @ 40-41 times\r
+ add r1, r1, #8\r
+.dsloop_nor1:\r
+ subs r10,r10, #0x01000000\r
+ add r8, r8, #1\r
+ bmi .dsloop_exit\r
+\r
+.dsloop_enter:\r
+ and r7, r5, r8\r
+ add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)\r
+ ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend)\r
+\r
+ tst r7, #0x8000\r
+ bne .DrawStrip_hiprio\r
+\r
+ cmp r7, r9\r
+ beq .DrawStrip_samecode @ we know stuff about this tile already\r
+\r
+ mov r9, r7 @ remember code\r
+\r
+ movs r2, r9, lsl #20 @ if (code&0x1000)\r
+ mov r2, r2, lsl #1\r
+@ bic r7, r10,#0xff000000\r
+@ add r2, r7, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4; addr+=ty\r
+ add r2, r2, r10, lsl #17\r
+ mov r2, r2, lsr #17\r
+ eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
+\r
+ bic r7, r3, #0x3f\r
+ and r3, r9, #0x6000\r
+ add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
+\r
+ ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+\r
+.DrawStrip_samecode:\r
+ tst r2, r2\r
+ beq .dsloop @ tileline blank\r
+\r
+ cmp r2, r2, ror #4\r
+ beq .DrawStrip_SingleColor @ tileline singlecolor \r
+\r
+ tst r9, #0x0800\r
+ beq .DrawStrip_TileNorm\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
+ TileFlip r0\r
+ b .dsloop\r
+\r
+.DrawStrip_TileNorm:\r
+ TileNorm r0\r
+ b .dsloop\r
+\r
+.DrawStrip_SingleColor:\r
+ and r4, r2, #0xf\r
+ orr r4, r3, r4\r
+ orr r4, r4, r4, lsl #8\r
+ tst r1, #1 @ not aligned?\r
+ strneb r4, [r1], #1\r
+ streqh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
+ b .dsloop_nor1 @ we incremeted r1 ourselves\r
+\r
+.DrawStrip_hiprio:\r
+ tst r10, #0x00c00000\r
+ beq .DrawStrip_hiprio_maybempt\r
+ sub r0, r1, r11\r
+ orr r7, r7, r0, lsl #16\r
+ orr r7, r7, r10, lsl #25 @ (ty<<25)\r
+ tst r7, #0x1000\r
+ eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26;\r
+ str r7, [r6], #4 @ cache hi priority tile\r
+ mov r0, #0xf\r
+ b .dsloop\r
+\r
+.DrawStrip_hiprio_maybempt:\r
+ cmp r7, r9\r
+ beq .dsloop @ must've been empty, otherwise we wouldn't get here\r
+ mov r9, r7 @ remember code\r
+ movs r2, r9, lsl #20 @ if (code&0x1000)\r
+ mov r2, r2, lsl #1\r
+ add r2, r2, r10, lsl #17\r
+ mov r2, r2, lsr #17\r
+ eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
+ ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+ tst r2, r2\r
+ orrne r10, r10, #1<<22\r
+ bne .DrawStrip_hiprio\r
+ b .dsloop\r
+\r
+.dsloop_exit:\r
+ mov r0, #0\r
+ str r0, [r6] @ terminate the cache list\r
+\r
+ ldmfd sp!, {r4-r11,lr}\r
+ bx lr\r
+\r
+\r
+.DrawStrip_vsscroll:\r
+ @ shit, we have 2-cell column based vscroll\r
+ @ let the c code handle this (for now)\r
+\r
+ @ int nametab; // 0x00\r
+ @ int line; // 0x04\r
+ @ int hscroll; // 0x08\r
+ @ int xmask; // 0x0C\r
+ @ int *hc; // 0x10 (pointer to cache buffer)\r
+ @ int cells; // 0x14\r
+\r
+ sub sp, sp, #6*4\r
+ orr r2, r1, r10, lsl #24 @ ts.line=ymask|(shift[width]<<24); // save some stuff instead of line\r
+ mov r1, r0 @ plane\r
+ mov r0, r12, lsr #1 @ halfwords\r
+ and r9, r9, #0xff\r
+ stmia sp, {r0,r2,r3,r5,r6,r9}\r
+\r
+ mov r0, sp\r
+ bl DrawStripVSRam @ struct TileStrip *ts, int plane\r
+\r
+ add sp, sp, #6*4\r
+ ldmfd sp!, {r4-r11,lr}\r
+ bx lr\r
+\r
+@ interlace mode 2? Sonic 2?\r
+.DrawStrip_interlace:\r
+ tst r0, r0\r
+ moveq r7, r7, lsl #21\r
+ movne r7, r7, lsl #5\r
+\r
+ @ Find the line in the name table\r
+ add r2, r7, r2, lsl #22 @ r2=(vscroll+(Scanline<<1))<<21 (11 bits);\r
+ orr r1, r1, #0x80000000\r
+ and r2, r2, r1, ror #10 @ &((ymask<<1)|1)<<21;\r
+ mov r2, r2, lsr #21\r
+ mov r4, r2, lsr #4\r
+ mov r12, r12, lsr #1 @ halfwords\r
+ add r0, r12, r4, lsl r10 @ nametab+=(ts.line>>4)<<shift[width];\r
+ and r9, r9, #0xff\r
+\r
+ sub sp, sp, #6*4\r
+ stmia sp, {r0,r2,r3,r5,r6,r9}\r
+\r
+ mov r0, sp\r
+ bl DrawStripInterlace @ struct TileStrip *ts\r
+\r
+ add sp, sp, #6*4\r
+ ldmfd sp!, {r4-r11,lr}\r
+ bx lr\r
+\r
+.pool\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+\r
+.global BackFill @ int reg7, int sh\r
+\r
+BackFill:\r
+ stmfd sp!, {r4-r9,lr}\r
+\r
+@ ldr lr, =(HighCol+8)\r
+ ldr lr,=DrawLineInt\r
+ ldr lr,[lr]\r
+ add lr, lr, #8\r
+\r
+ mov r0, r0, lsl #26\r
+ mov r0, r0, lsr #26\r
+ orr r0, r0, r1, lsl #6\r
+ orr r0, r0, r0, lsl #8\r
+ orr r0, r0, r0, lsl #16\r
+\r
+ mov r1, r0\r
+ mov r2, r0\r
+ mov r3, r0\r
+ mov r4, r0\r
+ mov r5, r0\r
+ mov r6, r0\r
+ mov r7, r0\r
+\r
+ @ go go go!\r
+ stmia lr!, {r0-r7} @ 10*8*4\r
+ stmia lr!, {r0-r7}\r
+ stmia lr!, {r0-r7}\r
+ stmia lr!, {r0-r7}\r
+ stmia lr!, {r0-r7}\r
+ stmia lr!, {r0-r7}\r
+ stmia lr!, {r0-r7}\r
+ stmia lr!, {r0-r7}\r
+ stmia lr!, {r0-r7}\r
+ stmia lr!, {r0-r7}\r
+\r
+ ldmfd sp!, {r4-r9,r12}\r
+ bx r12\r
+\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+\r
+.global DrawTilesFromCache @ int *hc, int sh\r
+\r
+DrawTilesFromCache:\r
+ stmfd sp!, {r4-r8,r11,lr}\r
+\r
+ mvn r5, #0 @ r5=prevcode=-1\r
+ mov r8, r1\r
+\r
+ @ cache some stuff to avoid mem access\r
+@ ldr r11,=HighCol\r
+ ldr r11,=DrawLineInt\r
+ ldr r11,[r11]\r
+ ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
+ mov r12,#0xf\r
+\r
+ @ scratch: r4, r7\r
+.dtfc_loop:\r
+ ldr r6, [r0], #4 @ read code\r
+ movs r1, r6, lsr #16 @ r1=dx;\r
+ ldmeqfd sp!, {r4-r8,r11,pc} @ dx is never zero, this must be a terminator, return\r
+ bic r1, r1, #0xfe00\r
+ add r1, r11, r1 @ r1=pdest\r
+\r
+@ tst r8, r8\r
+@ bne .dtfc_shadow @ this is a rare case, so we jump when it happens, not when it doesn't\r
+@.dtfc_shadow_done:\r
+\r
+ mov r7, r6, lsl #16\r
+ cmp r5, r7, lsr #16\r
+ beq .dtfc_samecode @ if (code==prevcode)\r
+\r
+ mov r5, r7, lsr #16\r
+\r
+ mov r2, r5, lsl #21\r
+ mov r2, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4;\r
+ add r2, r2, r6, lsr #25 @ addr+=ty\r
+\r
+ and r3, r5, #0x6000\r
+ mov r3, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
+\r
+ ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+\r
+.dtfc_samecode:\r
+ tst r8, r8\r
+ bne .dtfc_shadow\r
+\r
+ tst r2, r2\r
+ beq .dtfc_loop\r
+\r
+ cmp r2, r2, ror #4\r
+ beq .dtfc_SingleColor @ tileline singlecolor \r
+\r
+ tst r5, #0x0800\r
+ beq .dtfc_TileNorm\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
+ TileFlip r12\r
+ b .dtfc_loop\r
+\r
+.dtfc_TileNorm:\r
+ TileNorm r12\r
+ b .dtfc_loop\r
+\r
+.dtfc_SingleColor:\r
+ and r4, r2, #0xf\r
+ orr r4, r3, r4\r
+ orr r4, r4, r4, lsl #8\r
+ tst r1, #1 @ not aligned?\r
+ strneb r4, [r1], #1\r
+ streqh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
+ b .dtfc_loop\r
+\r
+.dtfc_shadow:\r
+ tst r2, r2\r
+ beq .dtfc_shadow_blank\r
+\r
+ cmp r2, r2, ror #4\r
+ beq .dtfc_SingleColor @ tileline singlecolor \r
+\r
+ tst r5, #0x0800\r
+ beq .dtfc_TileNormShHP\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
+ TileFlipShHP r12\r
+ b .dtfc_loop\r
+\r
+.dtfc_TileNormShHP:\r
+ TileNormShHP r12\r
+ b .dtfc_loop\r
+\r
+.dtfc_shadow_blank:\r
+ ldrb r4, [r1]\r
+ tst r4, #0x80\r
+ andeq r4, r4,#0x3f\r
+ streqb r4, [r1]\r
+ ldrb r4, [r1,#1]\r
+ tst r4, #0x80\r
+ andeq r4, r4,#0x3f\r
+ streqb r4, [r1,#1]\r
+ ldrb r4, [r1,#2]\r
+ tst r4, #0x80\r
+ andeq r4, r4,#0x3f\r
+ streqb r4, [r1,#2]\r
+ ldrb r4, [r1,#3]\r
+ tst r4, #0x80\r
+ andeq r4, r4,#0x3f\r
+ streqb r4, [r1,#3]\r
+ ldrb r4, [r1,#4]\r
+ tst r4, #0x80\r
+ andeq r4, r4,#0x3f\r
+ streqb r4, [r1,#4]\r
+ ldrb r4, [r1,#5]\r
+ tst r4, #0x80\r
+ andeq r4, r4,#0x3f\r
+ streqb r4, [r1,#5]\r
+ ldrb r4, [r1,#6]\r
+ tst r4, #0x80\r
+ andeq r4, r4,#0x3f\r
+ streqb r4, [r1,#6]\r
+ ldrb r4, [r1,#7]\r
+ tst r4, #0x80\r
+ andeq r4, r4,#0x3f\r
+ streqb r4, [r1,#7]\r
+ b .dtfc_loop\r
+\r
+.pool\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+\r
+.global DrawSpritesFromCache @ int *hc, int sh\r
+\r
+DrawSpritesFromCache:\r
+ stmfd sp!, {r4-r11,lr}\r
+\r
+ @ cache some stuff to avoid mem access\r
+@ ldr r11,=HighCol\r
+ ldr r11,=DrawLineInt\r
+ ldr r11,[r11]\r
+ ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
+ mov r6, r1, lsl #31\r
+ orr r6, r6, #1<<30\r
+ mov r12,#0xf\r
+\r
+ mov r10, r0\r
+\r
+.dsfc_loop:\r
+ ldr r9, [r10], #4 @ read code\r
+ tst r9, r9\r
+ ldmeqfd sp!, {r4-r11,pc}\r
+\r
+ mov r4, r9, lsl #28\r
+ bic r6, r6, #7\r
+ orr r6, r6, r4, lsr #30\r
+ add r6, r6, #1 @ r6=s1cc???? ... ?????www (s=shadow/hilight, cc=pal, w=width)\r
+\r
+ and r5, r9, #3\r
+ add r5, r5, #1 @ r5=delta\r
+ tst r9, #0x10000\r
+ rsbne r5, r5, #0 @ Flip X\r
+ mov r5, r5, lsl #4\r
+\r
+ mov r2, r9, lsr #17\r
+ mov r8, r2, lsl #1 @ tile=((unsigned int)code>>17)<<1;\r
+\r
+ and r3, r9, #0x30 @ r3=pal=(code&0x30);\r
+\r
+ bic r6, r6, #3<<28\r
+ orr r6, r6, r3, lsl #24\r
+\r
+ mov r0, r9, lsl #16\r
+ mov r0, r0, asr #22 @ sx=(code<<16)>>22\r
+ adds r0, r0, #0 @ set ZV\r
+ b .dsfc_inloop_enter\r
+\r
+@ scratch: r4, r7\r
+.dsfc_inloop:\r
+ sub r6, r6, #1\r
+ tst r6, #7\r
+ beq .dsfc_loop\r
+ adds r0, r0, #8\r
+ add r8, r8, r5\r
+\r
+.dsfc_inloop_enter:\r
+ ble .dsfc_inloop\r
+ cmp r0, #328\r
+ bge .dsfc_loop\r
+\r
+ mov r8, r8, lsl #17\r
+ mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address\r
+\r
+ ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+tile); // Get 8 pixels\r
+ tst r2, r2\r
+ beq .dsfc_inloop\r
+\r
+ add r1, r11, r0 @ r1=pdest\r
+\r
+ cmp r12, r6, lsr #28\r
+ beq .dsfc_shadow\r
+\r
+ cmp r2, r2, ror #4\r
+ beq .dsfc_SingleColor @ tileline singlecolor \r
+\r
+ tst r9, #0x10000\r
+ beq .dsfc_TileNorm\r
+\r
+ @ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
+ TileFlip r12\r
+ b .dsfc_inloop\r
+\r
+.dsfc_TileNorm:\r
+ TileNorm r12\r
+ b .dsfc_inloop\r
+\r
+.dsfc_SingleColor:\r
+ tst r0, #1 @ not aligned?\r
+ and r4, r2, #0xf\r
+ orr r4, r3, r4\r
+ orr r4, r4, r4, lsl #8\r
+ strneb r4, [r1], #1\r
+ streqh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strneb r4, [r1], #1\r
+ b .dsfc_inloop\r
+\r
+.dsfc_shadow:\r
+ cmp r2, r2, ror #4\r
+ beq .dsfc_singlec_sh\r
+\r
+ tst r9, #0x10000\r
+ beq .dsfc_TileNorm_sh\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
+ TileFlipSh\r
+ b .dsfc_inloop\r
+\r
+.dsfc_TileNorm_sh:\r
+ TileNormSh\r
+ b .dsfc_inloop\r
+\r
+.dsfc_singlec_sh:\r
+ cmp r2, #0xe0000000\r
+ bcc .dsfc_SingleColor @ normal singlecolor tileline (carry inverted in ARM)\r
+ tst r2, #0x10000000\r
+ bne .dsfc_sh_sh\r
+ TileSingleHi\r
+ b .dsfc_inloop\r
+\r
+.dsfc_sh_sh:\r
+ TileSingleSh\r
+ b .dsfc_inloop\r
+\r
+.pool\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+@ + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size\r
+@ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8\r
+\r
+.global DrawSprite @ unsigned int *sprite, int **hc, int sh\r
+\r
+DrawSprite:\r
+ stmfd sp!, {r4-r9,r11,lr}\r
+\r
+ ldr r3, [r0] @ sprite[0]\r
+ mov r6, r3, lsr #28\r
+ sub r6, r6, #1 @ r6=width-1 (inc later)\r
+ mov r5, r3, lsr #24\r
+ and r5, r5, #7 @ r5=height\r
+\r
+ mov r4, r3, lsl #16 @ r4=sy<<16 (tmp)\r
+\r
+ ldr r7, =Scanline\r
+ ldr r7, [r7]\r
+ sub r7, r7, r4, asr #16 @ r7=row=Scanline-sy\r
+\r
+ tst r2, r2\r
+ ldr r9, [r0, #4]\r
+ mov r2, r9, asr #16 @ r2=sx\r
+ bic r9, r9, #0xfe000000\r
+ orrne r9, r9, #1<<31 @ r9=code|(sh<<31)\r
+\r
+ tst r9, #0x1000\r
+ movne r4, r5, lsl #3\r
+ subne r4, r4, #1\r
+ subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y\r
+\r
+ mov r8, r9, lsl #21\r
+ mov r8, r8, lsr #21\r
+ add r8, r8, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down\r
+ \r
+ tst r9, #0x0800\r
+ mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1);\r
+ rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now\r
+\r
+ mov r8, r8, lsl #4\r
+ and r7, r7, #7\r
+ add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address\r
+\r
+ tst r9, #0x8000\r
+ bne .dspr_cache @ if(code&0x8000) // high priority - cache it\r
+\r
+ @ cache some stuff to avoid mem access\r
+@ ldr r11,=HighCol\r
+ ldr r11,=DrawLineInt\r
+ ldr r11,[r11]\r
+ ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
+ mov r12,#0xf\r
+\r
+ mov r5, r5, lsl #4 @ delta<<=4; // Delta of address\r
+ and r4, r9, #0x6000\r
+ orr r9, r9, r4, lsl #16\r
+ orr r9, r9, #0x10000000 @ r9=scc1 ???? ... <code> (s=shadow/hilight, cc=pal)\r
+\r
+ tst r9, #1<<31\r
+ mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30);\r
+ orrne r3, r3, #0x40 @ shadow by default\r
+\r
+ add r6, r6, #1 @ inc now\r
+ adds r0, r2, #0 @ mov sx to r0 and set ZV flags\r
+ b .dspr_loop_enter\r
+\r
+.dspr_loop:\r
+ subs r6, r6, #1 @ width--\r
+ ldmeqfd sp!, {r4-r9,r11,pc}@ return\r
+ adds r0, r0, #8 @ sx+=8\r
+ add r8, r8, r5 @ tile+=delta\r
+\r
+.dspr_loop_enter:\r
+ ble .dspr_loop @ sx <= 0\r
+ cmp r0, #328\r
+ ldmgefd sp!, {r4-r9,r11,pc}@ return\r
+\r
+ mov r8, r8, lsl #17\r
+ mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address\r
+\r
+ ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+ tst r2, r2\r
+ beq .dspr_loop\r
+\r
+ add r1, r11, r0 @ r1=pdest\r
+\r
+ cmp r12, r9, lsr #28\r
+ beq .dspr_shadow\r
+\r
+ cmp r2, r2, ror #4\r
+ beq .dspr_SingleColor @ tileline singlecolor \r
+\r
+ tst r9, #0x0800\r
+ beq .dspr_TileNorm\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
+ TileFlip r12\r
+ b .dspr_loop\r
+\r
+@ scratch: r4, r7\r
+.dspr_TileNorm:\r
+ TileNorm r12\r
+ b .dspr_loop\r
+\r
+.dspr_SingleColor:\r
+ and r4, r2, #0xf\r
+ orr r4, r3, r4\r
+ orr r4, r4, r4, lsl #8\r
+ tst r0, #1 @ not aligned?\r
+ strneb r4, [r1], #1\r
+ streqh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strneb r4, [r1], #1\r
+ b .dspr_loop\r
+\r
+.dspr_shadow:\r
+ cmp r2, r2, ror #4\r
+ beq .dspr_singlec_sh\r
+\r
+ tst r9, #0x0800\r
+ beq .dspr_TileNorm_sh\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
+ TileFlipSh\r
+ b .dspr_loop\r
+\r
+.dspr_TileNorm_sh:\r
+ TileNormSh\r
+ b .dspr_loop\r
+\r
+.dspr_singlec_sh:\r
+ cmp r2, #0xe0000000\r
+ bcc .dspr_SingleColor @ normal tileline\r
+ tst r2, #0x10000000\r
+ bne .dspr_sh_sh\r
+ TileSingleHi\r
+ b .dspr_loop\r
+\r
+.dspr_sh_sh:\r
+ TileSingleSh\r
+ b .dspr_loop\r
+\r
+\r
+.dspr_cache:\r
+ @ *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>24)&0xf);\r
+ mov r4, r8, lsl #16 @ tile\r
+ tst r9, #0x0800\r
+ orrne r4, r4, #0x10000 @ code&0x0800\r
+ mov r2, r2, lsl #22\r
+ orr r4, r4, r2, lsr #16 @ (sx<<6)&0x0000ffc0\r
+ and r2, r9, #0x6000\r
+ orr r4, r4, r2, lsr #9 @ (code>>9)&0x30\r
+ mov r2, r3, lsl #12\r
+ orr r4, r4, r2, lsr #28 @ (sprite[0]>>24)&0xf\r
+\r
+ ldr r2, [r1]\r
+ str r4, [r2], #4\r
+ str r2, [r1]\r
+\r
+ ldmfd sp!, {r4-r9,r11,lr}\r
+ bx lr\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+.global DrawWindow @ int tstart, int tend, int prio, int sh // int *hcache\r
+\r
+DrawWindow:\r
+ stmfd sp!, {r4-r11,lr}\r
+\r
+ ldr r11, =(Pico+0x22228) @ Pico.video\r
+ ldrb r12, [r11, #3] @ pvid->reg[3]\r
+ mov r12, r12, lsl #10\r
+\r
+ ldr r10, =Scanline\r
+ ldr r10, [r10]\r
+ mov r5, r10, lsr #3\r
+ and r10, r10, #7\r
+ mov r10, r10, lsl #1 @ r10=ty\r
+\r
+ ldr r4, [r11, #12]\r
+ tst r4, #1 @ 40 cell mode?\r
+ andne r12, r12, #0xf000 @ 0x3c<<10\r
+ andeq r12, r12, #0xf800\r
+ addne r12, r12, r5, lsl #7\r
+ addeq r12, r12, r5, lsl #6 @ nametab\r
+ add r12, r12, r0, lsl #2 @ +starttile\r
+\r
+ ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
+\r
+ @ fetch the first code now\r
+ ldrh r7, [lr, r12]\r
+\r
+ ldr r6, =rendstatus\r
+ ldrb r6, [r6]\r
+ ands r6, r6, #2 @ we care about bit 1 only\r
+ orr r6, r6, r2\r
+ bne .dw_no_sameprio\r
+\r
+ cmp r2, r7, lsr #15\r
+ ldmnefd sp!, {r4-r11,pc} @ assume that whole window uses same priority\r
+\r
+.dw_no_sameprio:\r
+ orr r6, r6, r3, lsl #8 @ shadow mode\r
+\r
+ sub r8, r1, r0\r
+ mov r8, r8, lsl #1 @ cells\r
+\r
+ mvn r9, #0 @ r9=prevcode=-1\r
+\r
+ @ cache some stuff to avoid mem access\r
+@ ldr r11,=(HighCol+8)\r
+ ldr r11,=DrawLineInt\r
+ ldr r11,[r11]\r
+ add r11,r11, #8\r
+ add r1, r11, r0, lsl #4 @ r1=pdest\r
+ mov r0, #0xf\r
+ b .dwloop_enter\r
+\r
+ @ r4,r5 & r7 are scratch in this loop\r
+.dwloop:\r
+ add r1, r1, #8\r
+.dwloop_nor1:\r
+ subs r8, r8, #1\r
+ add r12, r12, #2 @ halfwords\r
+ beq .dwloop_end @ done\r
+\r
+ ldrh r7, [lr, r12] @ r7=code (int, but from unsigned, no sign extend)\r
+\r
+ eor r5, r6, r7, lsr #15\r
+ tst r5, #1\r
+ orrne r6, r6, #2 @ wrong pri\r
+ bne .dwloop\r
+\r
+ cmp r7, r9\r
+ beq .dw_samecode @ we know stuff about this tile already\r
+\r
+.dwloop_enter:\r
+ mov r9, r7 @ remember code\r
+\r
+ movs r2, r9, lsl #20 @ if (code&0x1000)\r
+ mov r2, r2, lsl #1\r
+ add r2, r10, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4; addr+=ty\r
+ eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
+\r
+ and r3, r9, #0x6000\r
+ mov r3, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
+\r
+ ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+\r
+.dw_samecode:\r
+ tst r6, #0x100\r
+ bne .dw_shadow\r
+.dw_shadow_done:\r
+ tst r2, r2\r
+ beq .dwloop @ tileline blank\r
+\r
+ cmp r2, r2, ror #4\r
+ beq .dw_SingleColor @ tileline singlecolor \r
+\r
+ tst r9, #0x0800\r
+ beq .dw_TileNorm\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
+ TileFlip r0\r
+ b .dwloop\r
+\r
+.dw_TileNorm:\r
+ TileNorm r0\r
+ b .dwloop\r
+\r
+.dw_SingleColor:\r
+ and r4, r0, r2 @ #0x0000000f\r
+ orr r4, r3, r4\r
+ orr r4, r4, r4, lsl #8\r
+ orr r4, r4, r4, lsl #16\r
+ mov r5, r4\r
+ stmia r1!, {r4,r5}\r
+ b .dwloop_nor1 @ we incremeted r1 ourselves\r
+\r
+.dw_shadow:\r
+ tst r6, #1 @ hi pri?\r
+ orreq r3, r3, #0x40\r
+ beq .dw_shadow_done\r
+ ldr r4, [r1]\r
+ tst r4, #0x00000080\r
+ biceq r4, r4, #0x000000c0\r
+ tst r4, #0x00008000\r
+ biceq r4, r4, #0x0000c000\r
+ tst r4, #0x00800000\r
+ biceq r4, r4, #0x00c00000\r
+ tst r4, #0x80000000\r
+ biceq r4, r4, #0xc0000000\r
+ str r4, [r1]\r
+ ldr r4, [r1,#4]\r
+ tst r4, #0x00000080\r
+ biceq r4, r4, #0x000000c0\r
+ tst r4, #0x00008000\r
+ biceq r4, r4, #0x0000c000\r
+ tst r4, #0x00800000\r
+ biceq r4, r4, #0x00c00000\r
+ tst r4, #0x80000000\r
+ biceq r4, r4, #0xc0000000\r
+ str r4, [r1,#4]\r
+ b .dw_shadow_done\r
+\r
+.dwloop_end:\r
+ ldr r0, =rendstatus\r
+ ldr r1, [r0]\r
+ and r6, r6, #2\r
+ orr r1, r1, r6\r
+ str r1, [r0]\r
+\r
+ ldmfd sp!, {r4-r11,r12}\r
+ bx r12\r
+\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+\r
+@ hilights 2 pixels in RGB444/BGR444 format\r
+.macro TileDoShHi2Pixels444 reg\r
+ mov \reg, \reg, ror #12\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #28\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #28\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #24\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #28\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #28\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #12\r
+.endm\r
+\r
+\r
+.global FinalizeLineBGR444 @ int sh\r
+\r
+FinalizeLineBGR444:\r
+ stmfd sp!, {r4-r6,lr}\r
+ mov r6, r0\r
+ ldr r0, =DrawLineDest\r
+ ldr r0, [r0]\r
+ ldr lr, =(Pico+0x22228) @ Pico.video\r
+ sub r3, lr, #0x128 @ r3=Pico.cram\r
+\r
+ ldrb r12, [lr, #12]\r
+ tst r12, #1\r
+ movne r2, #320/4 @ len\r
+ moveq r2, #256/4\r
+ addeq r0, r0, #32*2\r
+ ldreq r4, =PicoOpt\r
+ ldreq r4, [r4]\r
+ tsteq r4, #0x100\r
+ addeq r0, r0, #32*2\r
+\r
+ tst r6, r6\r
+ beq .fl_noshBGR444\r
+\r
+ ldr r4, =HighPal\r
+\r
+ ldrb r12, [lr, #-0x1a] @ 0x2220e ~ dirtyPal\r
+ tst r12, r12\r
+ moveq r3, r4\r
+ beq .fl_noshBGR444\r
+ mov r12, #0\r
+ strb r12, [lr, #-0x1a]\r
+\r
+ mov lr, #0x40/8\r
+ @ copy pal:\r
+.fl_loopcpBGR444:\r
+ subs lr, lr, #1\r
+ ldmia r3!, {r1,r5,r6,r12}\r
+ stmia r4!, {r1,r5,r6,r12}\r
+ bne .fl_loopcpBGR444\r
+\r
+ @ shadowed pixels:\r
+ mov r12, #0x0077\r
+ orr r12,r12,#0x0700\r
+ orr r12,r12,r12,lsl #16\r
+ sub r3, r3, #0x40*2\r
+ add r5, r4, #0x80*2\r
+ mov lr, #0x40/4\r
+.fl_loopcpBGR444_sh:\r
+ subs lr, lr, #1\r
+ ldmia r3!, {r1,r6}\r
+ and r1, r12, r1, lsr #1\r
+ and r6, r12, r6, lsr #1\r
+ stmia r4!, {r1,r6}\r
+ stmia r5!, {r1,r6}\r
+ bne .fl_loopcpBGR444_sh\r
+\r
+ @ hilighted pixels:\r
+ sub r3, r3, #0x40*2\r
+ mov lr, #0x40/2\r
+.fl_loopcpBGR444_hi:\r
+ ldr r1, [r3], #4\r
+ TileDoShHi2Pixels444 r1\r
+ str r1, [r4], #4\r
+ subs lr, lr, #1\r
+ bne .fl_loopcpBGR444_hi\r
+\r
+ sub r3, r4, #0x40*3*2\r
+\r
+\r
+.fl_noshBGR444:\r
+@ ldr r1, =(HighCol+8)\r
+ ldr r1, =DrawLineInt\r
+ ldr r1, [r1]\r
+ add r1, r1, #8\r
+ mov lr, #0xff\r
+ mov lr, lr, lsl #1\r
+\r
+.fl_loopBGR444:\r
+ subs r2, r2, #1\r
+\r
+ ldr r12, [r1], #4\r
+\r
+ and r4, lr, r12, lsl #1\r
+ ldrh r4, [r3, r4]\r
+ and r5, lr, r12, lsr #7\r
+ ldrh r5, [r3, r5]\r
+ orr r4, r4, r5, lsl #16\r
+\r
+ and r5, lr, r12, lsr #15\r
+ ldrh r5, [r3, r5]\r
+ and r6, lr, r12, lsr #23\r
+ ldrh r6, [r3, r6]\r
+ orr r5, r5, r6, lsl #16\r
+\r
+ stmia r0!, {r4,r5}\r
+ bne .fl_loopBGR444\r
+\r
+\r
+ ldmfd sp!, {r4-r6,lr}\r
+ bx lr\r
+\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+\r
+@ hilights 2 pixels in RGB555/BGR555 format\r
+.macro TileDoShHi2Pixels555 reg\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #27\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #26\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #27\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #27\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #26\r
+ adds \reg, \reg, #0x40000000\r
+ orrcs \reg, \reg, #0xf0000000\r
+ mov \reg, \reg, ror #27\r
+.endm\r
+\r
+\r
+@ Convert 0000bbb0 ggg0rrr0\r
+@ to rrrrrggg gggbbbbb\r
+\r
+@ r2,r3,r9 - scratch, lr = 0x001c001c, r8 = 0x00030003\r
+.macro convRGB565 reg\r
+ and r2, lr, \reg,lsl #1\r
+ and r9, r8, \reg,lsr #2\r
+ orr r2, r2, r9 @ r2=red\r
+ and r3, lr, \reg,lsr #7\r
+ and r9, r8, \reg,lsr #10\r
+ orr r3, r3, r9 @ r3=blue\r
+ and \reg, \reg, lr, lsl #3\r
+ orr \reg, \reg, \reg,lsl #3 @ green\r
+ orr \reg, \reg, r2, lsl #11 @ add red back\r
+ orr \reg, \reg, r3 @ add blue back\r
+.endm\r
+\r
+vidConvCpyRGB565: @ void *to, void *from, int pixels\r
+ stmfd sp!, {r4-r9,lr}\r
+\r
+ mov r12, r2, lsr #3 @ repeats\r
+ mov lr, #0x001c0000\r
+ orr lr, lr, #0x01c @ lr == pattern 0x001c001c\r
+ mov r8, #0x00030000\r
+ orr r8, r8, #0x003 @ lr == pattern 0x001c001c\r
+\r
+.loopRGB565:\r
+ subs r12, r12, #1\r
+\r
+ ldmia r1!, {r4-r7}\r
+ convRGB565 r4\r
+ str r4, [r0], #4\r
+ convRGB565 r5\r
+ str r5, [r0], #4\r
+ convRGB565 r6\r
+ str r6, [r0], #4\r
+ convRGB565 r7\r
+ str r7, [r0], #4\r
+\r
+ bgt .loopRGB565\r
+\r
+ ldmfd sp!, {r4-r9,lr}\r
+ bx lr\r
+\r
+\r
+\r
+.global FinalizeLineRGB555 @ int sh\r
+\r
+FinalizeLineRGB555:\r
+ stmfd sp!, {r4-r8,lr}\r
+ ldr r5, =(Pico+0x22228) @ Pico.video\r
+ ldr r4, =HighPal\r
+ mov r6, r0\r
+\r
+ ldrb r7, [r5, #-0x1a] @ 0x2220e ~ dirtyPal\r
+ tst r7, r7\r
+ beq .fl_noconvRGB555\r
+ mov r1, #0\r
+ strb r1, [r5, #-0x1a]\r
+ sub r1, r5, #0x128 @ r1=Pico.cram\r
+ mov r0, r4\r
+ mov r2, #0x40\r
+ bl vidConvCpyRGB565\r
+\r
+.fl_noconvRGB555:\r
+ ldr r0, =DrawLineDest\r
+ ldr r0, [r0]\r
+\r
+ ldrb r12, [r5, #12]\r
+ tst r12, #1\r
+ movne r2, #320/8 @ len\r
+ moveq r2, #256/8\r
+ ldreq r3, =PicoOpt\r
+ ldreq r3, [r3]\r
+ tsteq r3, #0x100\r
+ addeq r0, r0, #32*2\r
+\r
+ mov r3, r4\r
+ tst r6, r6\r
+ beq .fl_noshRGB555\r
+ tst r7, r7\r
+ beq .fl_noshRGB555\r
+\r
+ @ shadowed pixels:\r
+ mov r12, #0x008e\r
+ orr r12,r12,#0x7300\r
+ orr r12,r12,r12,lsl #16\r
+ add r4, r3, #0x40*2\r
+ add r5, r3, #0xc0*2\r
+ mov lr, #0x40/4\r
+.fl_loopcpRGB555_sh:\r
+ subs lr, lr, #1\r
+ ldmia r3!, {r1,r6}\r
+ and r1, r12, r1, lsr #1\r
+ and r6, r12, r6, lsr #1\r
+ stmia r4!, {r1,r6}\r
+ stmia r5!, {r1,r6}\r
+ bne .fl_loopcpRGB555_sh\r
+\r
+ @ hilighted pixels:\r
+ sub r3, r3, #0x40*2\r
+ mov lr, #0x40/2\r
+.fl_loopcpRGB555_hi:\r
+ ldr r1, [r3], #4\r
+ TileDoShHi2Pixels555 r1\r
+ str r1, [r4], #4\r
+ subs lr, lr, #1\r
+ bne .fl_loopcpRGB555_hi\r
+\r
+ sub r3, r3, #0x40*2\r
+\r
+\r
+.fl_noshRGB555:\r
+@ ldr r1, =(HighCol+8)\r
+ ldr r1, =DrawLineInt\r
+ ldr r1, [r1]\r
+ add r1, r1, #8\r
+ mov lr, #0xff\r
+ mov lr, lr, lsl #1\r
+\r
+.fl_loopRGB555:\r
+ subs r2, r2, #1\r
+\r
+ ldr r12, [r1], #4\r
+ ldr r7, [r1], #4\r
+\r
+ and r4, lr, r12, lsl #1\r
+ ldrh r4, [r3, r4]\r
+ and r5, lr, r12, lsr #7\r
+ ldrh r5, [r3, r5]\r
+ orr r4, r4, r5, lsl #16\r
+\r
+ and r5, lr, r12, lsr #15\r
+ ldrh r5, [r3, r5]\r
+ and r6, lr, r12, lsr #23\r
+ ldrh r6, [r3, r6]\r
+ orr r5, r5, r6, lsl #16\r
+\r
+ and r8, lr, r7, lsl #1\r
+ ldrh r8, [r3, r8]\r
+ and r6, lr, r7, lsr #7\r
+ ldrh r6, [r3, r6]\r
+ orr r8, r8, r6, lsl #16\r
+\r
+ and r12,lr, r7, lsr #15\r
+ ldrh r12,[r3, r12]\r
+ and r6, lr, r7, lsr #23\r
+ ldrh r6, [r3, r6]\r
+ orr r12,r12, r6, lsl #16\r
+\r
+ stmia r0!, {r4,r5,r8,r12}\r
+ bne .fl_loopRGB555\r
+\r
+\r
+ ldmfd sp!, {r4-r8,lr}\r
+ bx lr\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
+\r
+@ utility\r
+.global blockcpy @ void *dst, void *src, size_t n\r
+\r
+blockcpy:\r
+ stmfd sp!, {r4,r5}\r
+ mov r2, r2, lsr #4\r
+blockcpy_loop:\r
+ subs r2, r2, #1\r
+ ldmia r1!, {r3-r5,r12}\r
+ stmia r0!, {r3-r5,r12}\r
+ bne blockcpy_loop\r
+ ldmfd sp!, {r4,r5}\r
+ bx lr\r
+\r
+\r
+.global blockcpy_or @ void *dst, void *src, size_t n, int pat\r
+\r
+blockcpy_or:\r
+ stmfd sp!, {r4-r6}\r
+ orr r3, r3, r3, lsl #8\r
+ orr r3, r3, r3, lsl #16\r
+ mov r2, r2, lsr #4\r
+blockcpy_loop_or:\r
+ subs r2, r2, #1\r
+ ldmia r1!, {r4-r6,r12}\r
+ orr r4, r4, r3\r
+ orr r5, r5, r3\r
+ orr r6, r6, r3\r
+ orr r12,r12,r3\r
+ stmia r0!, {r4-r6,r12}\r
+ bne blockcpy_loop_or\r
+ ldmfd sp!, {r4-r6}\r
+ bx lr\r
+\r