+@ vim:filetype=armasm\r
+\r
@ assembly "optimized" version of some funtions from draw.c\r
@ this is highly specialized, be careful if changing related C code!\r
\r
-@ (c) Copyright 2006, notaz\r
+@ (c) Copyright 2007, Grazvydas "notaz" Ignotas\r
@ All Rights Reserved\r
\r
\r
.extern HighSprZ\r
.extern rendstatus\r
.extern DrawLineDest\r
-.extern DrawStripVSRam\r
.extern DrawStripInterlace\r
\r
\r
add r12, r12, r4, lsl r10 @ nametab+=(ts.line>>3)<<shift[width];\r
\r
@ ldmia r0, {r1,r2,r3,r5,r6,r9} @ r2=line, r3=ts->hscroll, r5=ts->xmask, r6=ts->hc, r9=ts->cells\r
-@ mov r12,r1, lsl #1 @ r12=(ts->nametab<<1) (halfword compliant)\r
\r
and r10,r2, #7\r
mov r10,r10, lsl #1 @ r10=ty=(ts->line&7)<<1;\r
ldmfd sp!, {r4-r11,lr}\r
bx lr\r
\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
.DrawStrip_vsscroll:\r
- @ shit, we have 2-cell column based vscroll\r
- @ let the c code handle this (for now)\r
+ rsb r8, r3, #0\r
+ mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3\r
+ bic r8, r8, #0xff000000\r
+ orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|tilex[15:0])\r
\r
- @ int nametab; // 0x00\r
- @ int line; // 0x04\r
- @ int hscroll; // 0x08\r
- @ int xmask; // 0x0C\r
- @ int *hc; // 0x10 (pointer to cache buffer)\r
- @ int cells; // 0x14\r
+ ldr r4, =Scanline\r
+ orr r5, r1, r10, lsl #24\r
+ ldr r4, [r4]\r
+ sub r1, r3, #1\r
+ orr r5, r5, r4, lsl #16 @ r5=(shift_width[31:24]|scanline[23:16]|ymask[15:0])\r
+ and r1, r1, #7\r
+ add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1\r
\r
- sub sp, sp, #6*4\r
- orr r2, r1, r10, lsl #24 @ ts.line=ymask|(shift[width]<<24); // save some stuff instead of line\r
- mov r1, r0 @ plane\r
- mov r0, r12, lsr #1 @ halfwords\r
- and r9, r9, #0xff\r
- stmia sp, {r0,r2,r3,r5,r6,r9}\r
+ mov r10,r9, lsl #16\r
+ tst r0, r0\r
+ orrne r10,r10, #0x8000\r
+ tst r9, #1<<31\r
+ mov r3, #0\r
+ orr r10,r10, #0xff000000 @ will be adjusted on entering loop\r
+ orrne r10,r10, #1<<23 @ r10=(cells[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0])\r
+ movne r3, #0x40 @ default to shadowed pal on sh mode\r
\r
- mov r0, sp\r
- bl DrawStripVSRam @ struct TileStrip *ts, int plane\r
+ mvn r9, #0 @ r9=prevcode=-1\r
\r
- add sp, sp, #6*4\r
- ldmfd sp!, {r4-r11,lr}\r
- bx lr\r
+ @ cache some stuff to avoid mem access\r
+ ldr r11,=HighCol\r
+ mov r0, #0xf\r
+ add r1, r11, r7 @ r1=pdest\r
+\r
+ cmp r7, #8\r
+ subne r10,r10, #0x01000000 @ have hscroll, start with negative cell\r
+\r
+\r
+ @ r4 & r7 are scratch in this loop\r
+.dsloop_vs_subr1:\r
+ sub r1, r1, #8\r
+.dsloop_vs: @ 40-41 times\r
+ add r10,r10, #0x01000000\r
+ and r4, r10, #0x003f0000\r
+ cmp r4, r10, asr #8\r
+ ble .dsloop_exit\r
+\r
+ @ calc offset and read tileline code to r7, also calc ty\r
+ add r7, lr, #0x012000\r
+ add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)\r
+ add r7, r7, r10,asr #23 @ vsram + ((cell&~1)<<1)\r
+ bic r7, r7, #3\r
+ tst r10,#0x8000 @ plane1?\r
+ addne r7, r7, #2\r
+ ldrh r7, [r7] @ r7=vscroll\r
+\r
+ bic r10,r10,#0xff @ clear old ty\r
+ and r4, r5, #0xff0000\r
+ add r4, r4, r7, lsl #16\r
+ and r4, r4, r5, lsl #16 @ r4=line<<16\r
+ and r7, r4, #0x70000\r
+ orr r10,r10,r7, lsr #15 @ new ty\r
+\r
+ mov r4, r4, lsr #19\r
+ mov r7, r5, lsr #24\r
+ mov r4, r4, lsl r7 @ nametabadd\r
+\r
+ and r7, r8, r8, lsr #25\r
+ add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)\r
+ add r7, r7, r4, lsl #1\r
+ ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend)\r
+\r
+ add r1, r1, #8\r
+ add r8, r8, #1\r
+\r
+ tst r7, #0x8000\r
+ bne .DrawStrip_vs_hiprio\r
+\r
+ cmp r7, r9\r
+ beq .DrawStrip_vs_samecode @ we know stuff about this tile already\r
+\r
+ mov r9, r7 @ remember code\r
+\r
+ movs r2, r9, lsl #20 @ if (code&0x1000)\r
+ mov r2, r2, lsl #1\r
+ add r2, r2, r10, lsl #17\r
+ mov r2, r2, lsr #17\r
+ eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
+\r
+ ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+\r
+ bic r7, r3, #0x3f\r
+ and r3, r9, #0x6000\r
+ add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
+\r
+.DrawStrip_vs_samecode:\r
+ tst r2, r2\r
+ beq .dsloop_vs @ tileline blank\r
+\r
+ cmp r2, r2, ror #4\r
+ beq .DrawStrip_vs_SingleColor @ tileline singlecolor \r
+\r
+ tst r9, #0x0800\r
+ beq .DrawStrip_vs_TileNorm\r
+\r
+ @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
+ TileFlip r0\r
+ b .dsloop_vs\r
+\r
+.DrawStrip_vs_TileNorm:\r
+ TileNorm r0\r
+ b .dsloop_vs\r
+\r
+.DrawStrip_vs_SingleColor:\r
+ and r4, r2, #0xf\r
+ orr r4, r3, r4\r
+ orr r4, r4, r4, lsl #8\r
+ tst r1, #1 @ not aligned?\r
+ strneb r4, [r1], #1\r
+ streqh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strh r4, [r1], #2\r
+ strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
+ b .dsloop_vs_subr1\r
+\r
+.DrawStrip_vs_hiprio:\r
+ tst r10, #0x00c00000\r
+ beq .DrawStrip_vs_hiprio_maybempt\r
+ sub r0, r1, r11\r
+ orr r7, r7, r0, lsl #16\r
+ orr r7, r7, r10, lsl #25 @ (ty<<25)\r
+ tst r7, #0x1000\r
+ eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26;\r
+ str r7, [r6], #4 @ cache hi priority tile\r
+ mov r0, #0xf\r
+ b .dsloop_vs\r
+\r
+.DrawStrip_vs_hiprio_maybempt:\r
+ cmp r7, r9\r
+ beq .dsloop_vs @ must've been empty, otherwise we wouldn't get here\r
+ movs r2, r7, lsl #20 @ if (code&0x1000)\r
+ mov r2, r2, lsl #1\r
+ add r2, r2, r10, lsl #17\r
+ mov r2, r2, lsr #17\r
+ eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
+ ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
+ mov r9, r7 @ remember code\r
+ tst r2, r2\r
+ orrne r10, r10, #1<<22\r
+ bne .DrawStrip_vs_hiprio\r
+ b .dsloop_vs\r
+\r
+\r
+@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
@ interlace mode 2? Sonic 2?\r
.DrawStrip_interlace:\r
\r
FinalizeLineRGB555:\r
stmfd sp!, {r4-r8,lr}\r
- ldr r5, =(Pico+0x22228) @ Pico.video\r
+ ldr r8, =(Pico+0x22228) @ Pico.video\r
ldr r4, =HighPal\r
\r
- ldrb r7, [r5, #-0x1a] @ 0x2220e ~ dirtyPal\r
+ ldrb r7, [r8, #-0x1a] @ 0x2220e ~ dirtyPal\r
mov r6, r0\r
mov r1, #0\r
tst r7, r7\r
beq .fl_noconvRGB555\r
- strb r1, [r5, #-0x1a]\r
- sub r1, r5, #0x128 @ r1=Pico.cram\r
+ strb r1, [r8, #-0x1a]\r
+ sub r1, r8, #0x128 @ r1=Pico.cram\r
mov r0, r4\r
mov r2, #0x40\r
bl vidConvCpyRGB565\r
\r
.fl_noconvRGB555:\r
- ldrb r12, [r5, #12]\r
- ldr r0, =DrawLineDest\r
- ldr r0, [r0]\r
-\r
- tst r12, #1\r
- movne r2, #320/8 @ len\r
- bne .fl_no32colRGB555\r
- ldr r3, =PicoOpt\r
- mov r2, #256/8\r
- ldr r3, [r3]\r
- tst r3, #0x100\r
- addeq r0, r0, #32*2\r
-\r
-.fl_no32colRGB555:\r
mov r3, r4\r
tst r6, r6\r
beq .fl_noshRGB555\r
\r
sub r3, r3, #0x40*2\r
\r
-\r
.fl_noshRGB555:\r
+ ldr r0, =DrawLineDest\r
ldr r1, =(HighCol+8)\r
+ ldr r0, [r0]\r
+\r
+ ldrb r12, [r8, #12]\r
mov lr, #0xff\r
mov lr, lr, lsl #1\r
\r
+ tst r12, #1\r
+ movne r2, #320/8 @ len\r
+ bne .fl_no32colRGB555\r
+ ldr r4, =PicoOpt\r
+ mov r2, #256/8\r
+ ldr r4, [r4]\r
+ tst r4, #0x4000\r
+ bne .fl_32scale_RGB555\r
+ tst r4, #0x0100\r
+ addeq r0, r0, #32*2\r
+\r
+.fl_no32colRGB555:\r
.fl_loopRGB555:\r
\r
ldr r12, [r1], #4\r
stmia r0!, {r4,r5,r8,r12}\r
bne .fl_loopRGB555\r
\r
+ ldmfd sp!, {r4-r8,lr}\r
+ bx lr\r
+\r
+\r
+.fl_32scale_RGB555:\r
+ stmfd sp!, {r9,r10}\r
+ mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
+ orr r9, r9, #0x00e7\r
+\r
+.fl_loop32scale_RGB555:\r
+ ldr r12, [r1], #4\r
+ ldr r7, [r1], #4\r
+\r
+ and r4, lr, r12,lsl #1\r
+ ldrh r4, [r3, r4]\r
+ and r5, lr, r12,lsr #7\r
+ ldrh r5, [r3, r5]\r
+ and r4, r4, r9, lsl #2\r
+ orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0\r
+ and r5, r5, r9, lsl #2\r
+ sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1\r
+ add r4, r4, r6, lsl #16 @ pix_d 0, 1\r
+ and r6, lr, r12,lsr #15\r
+ ldrh r6, [r3, r6]\r
+ and r12,lr, r12,lsr #23\r
+ ldrh r12,[r3, r12]\r
+ and r6, r6, r9, lsl #2\r
+ add r5, r5, r6\r
+ mov r5, r5, lsr #1\r
+ sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2\r
+ orr r5, r5, r6, lsl #16\r
+\r
+ and r6, lr, r7, lsl #1\r
+ ldrh r6, [r3, r6]\r
+ and r12,r12,r9, lsl #2\r
+ add r5, r5, r12,lsl #14 @ pix_d 2, 3\r
+ and r6, r6, r9, lsl #2\r
+ orr r6, r12,r6, lsl #16 @ pix_d 4, 5\r
\r
+ and r12,lr, r7, lsr #7\r
+ ldrh r12,[r3, r12]\r
+ and r10,lr, r7, lsr #15\r
+ ldrh r10,[r3, r10]\r
+ and r12,r12,r9, lsl #2\r
+ sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1\r
+ add r8, r8, r6, lsr #18\r
+ and r7, lr, r7, lsr #23\r
+ ldrh r7, [r3, r7]\r
+ and r10,r10,r9, lsl #2\r
+ orr r8, r8, r10,lsl #15\r
+ add r8, r8, r12,lsl #15 @ pix_d 6, 7\r
+ sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2\r
+ and r7, r7, r9, lsl #2\r
+ add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3\r
+ orr r10,r10,r7, lsl #16 @ pix_d 8, 9\r
+\r
+ subs r2, r2, #1\r
+\r
+ stmia r0!, {r4,r5,r6,r8,r10}\r
+ bne .fl_loop32scale_RGB555\r
+\r
+ ldmfd sp!, {r9,r10}\r
ldmfd sp!, {r4-r8,lr}\r
- bx lr\r
+ bx lr\r
+\r
\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r