@ vim:filetype=armasm\r
\r
-@ assembly "optimized" version of some funtions from draw.c\r
+@ ARM assembly versions of some funtions from draw.c\r
@ this is highly specialized, be careful if changing related C code!\r
\r
-@ (c) Copyright 2007, Grazvydas "notaz" Ignotas\r
+@ (c) Copyright 2007-2008, Grazvydas "notaz" Ignotas\r
@ All Rights Reserved\r
\r
+.include "port_config.s"\r
\r
.extern Pico\r
.extern PicoOpt\r
.extern rendstatus\r
.extern DrawLineDest\r
.extern DrawStripInterlace\r
+.extern HighCacheS_ptr\r
\r
\r
@ helper\r
.endif\r
ldreqb r4, [r1,#\offs]\r
orrne r4, r3, r4\r
- strneb r4, [r1,#\offs]\r
- tsteq r4, #0x80\r
andeq r4, r4, #0x3f\r
- streqb r4, [r1,#\offs]\r
+ strb r4, [r1,#\offs]\r
.endm\r
\r
-@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits\r
+@ TileNormShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits\r
.macro TileNormShHP\r
TilePixelShHP 12, 0 @ #0x0000f000\r
TilePixelShHP 8, 1 @ #0x00000f00\r
TilePixelShHP 16, 7 @ #0x000f0000\r
.endm\r
\r
-@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
+@ TileFlipShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
.macro TileFlipShHP\r
TilePixelShHP 16, 0 @ #0x000f0000\r
TilePixelShHP 20, 1 @ #0x00f00000\r
.else\r
ands r4, r12, r2\r
.endif\r
- beq 3f\r
+ beq 0f\r
cmp r4, #0xe\r
- beq 2f\r
- bgt 1f\r
- orr r4, r3, r4\r
- strb r4, [r1,#\ofs]\r
- b 3f\r
-1:\r
- ldrb r4, [r1,#\ofs] @ 2ci\r
- orr r4, r4, #0xc0\r
- strb r4, [r1,#\ofs]\r
- b 3f\r
-2:\r
- ldrb r4, [r1,#\ofs] @ 2ci\r
- bic r4, r4, #0xc0\r
- orr r4, r4, #0x80\r
+ ldrgeb r4, [r1,#\ofs]\r
+ orrlt r4, r3, r4 @ normal\r
+\r
+ biceq r4, r4, #0xc0 @ hilight\r
+ orreq r4, r4, #0x80\r
+ orrgt r4, r4, #0xc0 @ shadow\r
+\r
strb r4, [r1,#\ofs]\r
-3:\r
+0:\r
.endm\r
\r
@ TileFlipSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
TileDoShGenPixel 16, 7 @ #0x000f0000\r
.endm\r
\r
+.macro TileDoShGenPixel_noop shift ofs\r
+.if \shift\r
+ and r4, r12, r2, lsr #\shift\r
+.else\r
+ and r4, r12, r2\r
+.endif\r
+ sub r7, r4, #1\r
+ cmp r7, #0xd\r
+ orrcc r4, r3, r4 @ 0-0xc (was 1-0xd)\r
+ strccb r4, [r1,#\ofs]\r
+.endm\r
+\r
+.macro TileFlipSh_noop\r
+ TileDoShGenPixel_noop 16, 0 @ #0x000f0000\r
+ TileDoShGenPixel_noop 20, 1 @ #0x00f00000\r
+ TileDoShGenPixel_noop 24, 2 @ #0x0f000000\r
+ TileDoShGenPixel_noop 28, 3 @ #0xf0000000\r
+ TileDoShGenPixel_noop 0, 4 @ #0x0000000f\r
+ TileDoShGenPixel_noop 4, 5 @ #0x000000f0\r
+ TileDoShGenPixel_noop 8, 6 @ #0x00000f00\r
+ TileDoShGenPixel_noop 12, 7 @ #0x0000f000\r
+.endm\r
+\r
+.macro TileNormSh_noop\r
+ TileDoShGenPixel_noop 12, 0 @ #0x0000f000\r
+ TileDoShGenPixel_noop 8, 1 @ #0x00000f00\r
+ TileDoShGenPixel_noop 4, 2 @ #0x000000f0\r
+ TileDoShGenPixel_noop 0, 3 @ #0x0000000f\r
+ TileDoShGenPixel_noop 28, 4 @ #0xf0000000\r
+ TileDoShGenPixel_noop 24, 5 @ #0x0f000000\r
+ TileDoShGenPixel_noop 20, 6 @ #0x00f00000\r
+ TileDoShGenPixel_noop 16, 7 @ #0x000f0000\r
+.endm\r
+\r
+.macro TileDoShGenPixel_onlyop_lp shift ofs\r
+.if \shift\r
+ ands r7, r12, r2, lsr #\shift\r
+.else\r
+ ands r7, r12, r2\r
+.endif\r
+ ldrneb r4, [r1,#\ofs]\r
+ tstne r4, #0x40\r
+ beq 0f\r
+\r
+ cmp r7, #0xe\r
+ biceq r4, r4, #0xc0 @ hilight\r
+ orreq r4, r4, #0x80\r
+ orrgt r4, r4, #0xc0 @ shadow\r
+ strgeb r4, [r1,#\ofs]\r
+0:\r
+.endm\r
+\r
+.macro TileFlipSh_onlyop_lp\r
+ TileDoShGenPixel_onlyop_lp 16, 0 @ #0x000f0000\r
+ TileDoShGenPixel_onlyop_lp 20, 1 @ #0x00f00000\r
+ TileDoShGenPixel_onlyop_lp 24, 2 @ #0x0f000000\r
+ TileDoShGenPixel_onlyop_lp 28, 3 @ #0xf0000000\r
+ TileDoShGenPixel_onlyop_lp 0, 4 @ #0x0000000f\r
+ TileDoShGenPixel_onlyop_lp 4, 5 @ #0x000000f0\r
+ TileDoShGenPixel_onlyop_lp 8, 6 @ #0x00000f00\r
+ TileDoShGenPixel_onlyop_lp 12, 7 @ #0x0000f000\r
+.endm\r
+\r
+.macro TileNormSh_onlyop_lp\r
+ TileDoShGenPixel_onlyop_lp 12, 0 @ #0x0000f000\r
+ TileDoShGenPixel_onlyop_lp 8, 1 @ #0x00000f00\r
+ TileDoShGenPixel_onlyop_lp 4, 2 @ #0x000000f0\r
+ TileDoShGenPixel_onlyop_lp 0, 3 @ #0x0000000f\r
+ TileDoShGenPixel_onlyop_lp 28, 4 @ #0xf0000000\r
+ TileDoShGenPixel_onlyop_lp 24, 5 @ #0x0f000000\r
+ TileDoShGenPixel_onlyop_lp 20, 6 @ #0x00f00000\r
+ TileDoShGenPixel_onlyop_lp 16, 7 @ #0x000f0000\r
+.endm\r
+\r
\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
@ int cells; // 0x14\r
@ };\r
\r
-@ int DrawLayer(int plane, int *hcache, int maxcells, int sh)\r
+@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells);\r
\r
-.global DrawLayer @ int plane, int *hcache, int maxcells, int sh\r
+.global DrawLayer\r
\r
DrawLayer:\r
stmfd sp!, {r4-r11,lr}\r
ldr r11, =(Pico+0x22228) @ Pico.video\r
mov r8, #1\r
\r
- ldrb r7, [r11, #16] @ ??hh??ww\r
+ ldrb r7, [r11, #16] @ ??vv??hh\r
\r
mov r6, r1 @ hcache\r
- orr r9, r2, r3, lsl #31 @ r9=maxcells|(sh<<31)\r
+ orr r9, r3, r0, lsl #30\r
+ orr r9, r9, r2, lsl #8 @ r9=sh[31]|cellskip[15:8]|maxcells[7:0] (tmp)\r
\r
mov r1, r7, lsl #4\r
orr r1, r1, #0x00ff\r
sub r5, r5, #1 @ r5=xmask\r
\r
@ Find name table:\r
- tst r0, r0\r
+ ands r0, r0, #1\r
ldreqb r12, [r11, #2]\r
ldrneb r12, [r11, #4]\r
\r
\r
tst r9, #1<<31\r
mov r3, #0\r
- orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|ty)\r
+ orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty)\r
movne r3, #0x40 @ default to shadowed pal on sh mode\r
\r
- mvn r9, #0 @ r9=prevcode=-1\r
-\r
cmp r7, #8\r
addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll\r
\r
+ and r9, r9, #0xff00\r
+ add r8, r8, r9, lsr #8 @ tilex+=cellskip\r
+ add r7, r7, r9, lsr #5 @ dx+=cellskip<<3;\r
+ sub r10,r10,r9, lsl #16 @ cells-=cellskip\r
+\r
@ cache some stuff to avoid mem access\r
+.if OVERRIDE_HIGHCOL\r
+ ldr r11,=HighCol\r
+ mov r0, #0xf\r
+ ldr r11,[r11]\r
+.else\r
ldr r11,=HighCol\r
mov r0, #0xf\r
+.endif\r
+\r
+ mvn r9, #0 @ r9=prevcode=-1\r
add r1, r11, r7 @ r1=pdest\r
\r
\r
beq .DrawStrip_samecode @ we know stuff about this tile already\r
\r
mov r9, r7 @ remember code\r
+ orr r10, r10, #1<<21 @ seen non hi-prio tile\r
\r
movs r2, r9, lsl #20 @ if (code&0x1000)\r
mov r2, r2, lsl #1\r
beq .DrawStrip_SingleColor @ tileline singlecolor \r
\r
tst r9, #0x0800\r
- beq .DrawStrip_TileNorm\r
+ bne .DrawStrip_TileFlip\r
\r
@ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
- TileFlip r0\r
- b .dsloop\r
-\r
.DrawStrip_TileNorm:\r
TileNorm r0\r
b .dsloop\r
\r
+.DrawStrip_TileFlip:\r
+ TileFlip r0\r
+ b .dsloop\r
+\r
.DrawStrip_SingleColor:\r
and r4, r2, #0xf\r
orr r4, r3, r4\r
strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
b .dsloop_subr1\r
\r
-.DrawStrip_hiprio:\r
- tst r10, #0x00c00000\r
- beq .DrawStrip_hiprio_maybempt\r
- sub r0, r1, r11\r
- orr r7, r7, r0, lsl #16\r
- orr r7, r7, r10, lsl #25 @ (ty<<25)\r
- tst r7, #0x1000\r
- eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26;\r
- str r7, [r6], #4 @ cache hi priority tile\r
- mov r0, #0xf\r
- b .dsloop\r
-\r
.DrawStrip_hiprio_maybempt:\r
cmp r7, r9\r
beq .dsloop @ must've been empty, otherwise we wouldn't get here\r
ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
mov r9, r7 @ remember code\r
tst r2, r2\r
- orrne r10, r10, #1<<22\r
- bne .DrawStrip_hiprio\r
+ beq .dsloop\r
+ orr r10, r10, #1<<22\r
+\r
+.DrawStrip_hiprio:\r
+ tst r10, #0x00c00000\r
+ beq .DrawStrip_hiprio_maybempt\r
+ sub r0, r1, r11\r
+ orr r7, r7, r0, lsl #16\r
+ orr r7, r7, r10, lsl #25 @ (ty<<25)\r
+ tst r7, #0x1000\r
+ eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26;\r
+ str r7, [r6], #4 @ cache hi priority tile\r
+ mov r0, #0xf\r
b .dsloop\r
\r
.dsloop_exit:\r
+ tst r10, #1<<21 @ seen non hi-prio tile\r
+ ldreq r1, =rendstatus\r
mov r0, #0\r
+ ldreq r2, [r1]\r
str r0, [r6] @ terminate the cache list\r
+ orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles\r
+ streq r2, [r1]\r
\r
ldmfd sp!, {r4-r11,lr}\r
bx lr\r
.DrawStrip_vsscroll:\r
rsb r8, r3, #0\r
mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3\r
- bic r8, r8, #0xff000000\r
- orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|tilex[15:0])\r
+ bic r8, r8, #0x3fc00000\r
+ orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[21:0])\r
\r
ldr r4, =Scanline\r
orr r5, r1, r10, lsl #24\r
add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1\r
\r
mov r10,r9, lsl #16\r
- tst r0, r0\r
+ tst r0, #1\r
orrne r10,r10, #0x8000\r
tst r9, #1<<31\r
mov r3, #0\r
orr r10,r10, #0xff000000 @ will be adjusted on entering loop\r
- orrne r10,r10, #1<<23 @ r10=(cells[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0])\r
+ orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0])\r
movne r3, #0x40 @ default to shadowed pal on sh mode\r
\r
- mvn r9, #0 @ r9=prevcode=-1\r
+ cmp r7, #8\r
+ subne r10,r10, #0x01000000 @ have hscroll, start with negative cell\r
+\r
+ and r9, r9, #0xff00\r
+ add r8, r8, r9, lsr #8 @ tilex+=cellskip\r
+ add r7, r7, r9, lsr #5 @ dx+=cellskip<<3;\r
+ add r10,r10,r9, lsl #16 @ cell+=cellskip\r
\r
@ cache some stuff to avoid mem access\r
+.if OVERRIDE_HIGHCOL\r
ldr r11,=HighCol\r
mov r0, #0xf\r
- add r1, r11, r7 @ r1=pdest\r
-\r
- cmp r7, #8\r
- subne r10,r10, #0x01000000 @ have hscroll, start with negative cell\r
+ ldr r11,[r11]\r
+.else\r
+ ldr r11,=HighCol\r
+ mov r0, #0xf\r
+.endif\r
\r
+ mvn r9, #0 @ r9=prevcode=-1\r
+ add r1, r11, r7 @ r1=pdest\r
\r
@ r4 & r7 are scratch in this loop\r
.dsloop_vs_subr1:\r
add r10,r10, #0x01000000\r
and r4, r10, #0x003f0000\r
cmp r4, r10, asr #8\r
- ble .dsloop_exit\r
+ ble .dsloop_vs_exit\r
\r
@ calc offset and read tileline code to r7, also calc ty\r
add r7, lr, #0x012000\r
ldrh r7, [r7] @ r7=vscroll\r
\r
bic r10,r10,#0xff @ clear old ty\r
- and r4, r5, #0xff0000\r
- add r4, r4, r7, lsl #16\r
- and r4, r4, r5, lsl #16 @ r4=line<<16\r
+ and r4, r5, #0xff0000 @ scanline\r
+ add r4, r4, r7, lsl #16 @ ... += vscroll\r
+ and r4, r4, r5, lsl #16 @ ... &= ymask\r
and r7, r4, #0x70000\r
orr r10,r10,r7, lsr #15 @ new ty\r
\r
beq .DrawStrip_vs_samecode @ we know stuff about this tile already\r
\r
mov r9, r7 @ remember code\r
+ orr r8, r8, #(1<<24)@ seen non hi-prio tile\r
\r
movs r2, r9, lsl #20 @ if (code&0x1000)\r
mov r2, r2, lsl #1\r
beq .DrawStrip_vs_SingleColor @ tileline singlecolor \r
\r
tst r9, #0x0800\r
- beq .DrawStrip_vs_TileNorm\r
+ bne .DrawStrip_vs_TileFlip\r
\r
@ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
- TileFlip r0\r
- b .dsloop_vs\r
-\r
.DrawStrip_vs_TileNorm:\r
TileNorm r0\r
b .dsloop_vs\r
\r
+.DrawStrip_vs_TileFlip:\r
+ TileFlip r0\r
+ b .dsloop_vs\r
+\r
.DrawStrip_vs_SingleColor:\r
and r4, r2, #0xf\r
orr r4, r3, r4\r
bne .DrawStrip_vs_hiprio\r
b .dsloop_vs\r
\r
+.dsloop_vs_exit:\r
+ tst r8, #(1<<24) @ seen non hi-prio tile\r
+ ldreq r1, =rendstatus\r
+ mov r0, #0\r
+ ldreq r2, [r1]\r
+ str r0, [r6] @ terminate the cache list\r
+ orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles\r
+ streq r2, [r1]\r
+\r
+ ldmfd sp!, {r4-r11,lr}\r
+ bx lr\r
+\r
\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
BackFill:\r
stmfd sp!, {r4-r9,lr}\r
\r
+.if OVERRIDE_HIGHCOL\r
+ ldr lr, =HighCol\r
+ mov r0, r0, lsl #26\r
+ ldr lr, [lr]\r
+ mov r0, r0, lsr #26\r
+ add lr, lr, #8\r
+.else\r
ldr lr, =(HighCol+8)\r
-\r
mov r0, r0, lsl #26\r
mov r0, r0, lsr #26\r
+.endif\r
+\r
orr r0, r0, r1, lsl #6\r
orr r0, r0, r0, lsl #8\r
orr r0, r0, r0, lsl #16\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
\r
-.global DrawTilesFromCache @ int *hc, int sh\r
+.global DrawTilesFromCache @ int *hc, int sh, int rlim\r
\r
DrawTilesFromCache:\r
stmfd sp!, {r4-r8,r11,lr}\r
\r
- mvn r5, #0 @ r5=prevcode=-1\r
- mov r8, r1\r
-\r
@ cache some stuff to avoid mem access\r
+.if OVERRIDE_HIGHCOL\r
ldr r11,=HighCol\r
- ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
mov r12,#0xf\r
+ ldr r11,[r11]\r
+.else\r
+ ldr r11,=HighCol\r
+ mov r12,#0xf\r
+.endif\r
+ ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
+\r
+ mvn r5, #0 @ r5=prevcode=-1\r
+ ands r8, r1, #1\r
+ orr r8, r8, r2, lsl #1\r
+ bne .dtfc_check_rendflags\r
\r
@ scratch: r4, r7\r
.dtfc_loop:\r
ldr r6, [r0], #4 @ read code\r
movs r1, r6, lsr #16 @ r1=dx;\r
ldmeqfd sp!, {r4-r8,r11,pc} @ dx is never zero, this must be a terminator, return\r
- bic r1, r1, #0xfe00\r
- add r1, r11, r1 @ r1=pdest\r
+ bic r4, r1, #0xfe00\r
+ add r1, r11, r4 @ r1=pdest\r
\r
mov r7, r6, lsl #16\r
cmp r5, r7, lsr #16\r
ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
\r
.dtfc_samecode:\r
- tst r8, r8\r
+ rsbs r4, r4, r8, lsr #1\r
+ bmi .dtfc_cut_tile\r
+\r
+ tst r8, #1\r
bne .dtfc_shadow\r
\r
tst r2, r2\r
beq .dtfc_SingleColor @ tileline singlecolor \r
\r
tst r5, #0x0800\r
- beq .dtfc_TileNorm\r
+ bne .dtfc_TileFlip\r
\r
@ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
- TileFlip r12\r
- b .dtfc_loop\r
-\r
.dtfc_TileNorm:\r
TileNorm r12\r
b .dtfc_loop\r
\r
+.dtfc_TileFlip:\r
+ TileFlip r12\r
+ b .dtfc_loop\r
+\r
.dtfc_SingleColor:\r
and r4, r2, #0xf\r
orr r4, r3, r4\r
beq .dtfc_SingleColor @ tileline singlecolor \r
\r
tst r5, #0x0800\r
- beq .dtfc_TileNormShHP\r
+ bne .dtfc_TileFlipShHP\r
\r
@ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
- TileFlipShHP\r
- b .dtfc_loop\r
-\r
.dtfc_TileNormShHP:\r
TileNormShHP\r
b .dtfc_loop\r
\r
+.dtfc_TileFlipShHP:\r
+ TileFlipShHP\r
+ b .dtfc_loop\r
+\r
.dtfc_shadow_blank:\r
- ldrb r4, [r1] @ 1ci\r
- ldrb r12,[r1,#1]\r
- tst r4, #0x80\r
- andeq r4, r4,#0x3f\r
- streqb r4, [r1]\r
- tst r12,#0x80\r
- ldrb r4, [r1,#2]\r
- andeq r12,r12,#0x3f\r
- streqb r12,[r1,#1]\r
- tst r4, #0x80\r
- ldrb r12,[r1,#3]\r
- andeq r4, r4,#0x3f\r
- streqb r4, [r1,#2]\r
- tst r12,#0x80\r
- ldrb r4, [r1,#4]\r
- andeq r12,r12,#0x3f\r
- streqb r12,[r1,#3]\r
- tst r4, #0x80\r
- ldrb r12,[r1,#5]\r
- andeq r4, r4,#0x3f\r
- streqb r4, [r1,#4]\r
- tst r12,#0x80\r
- ldrb r4, [r1,#6]\r
- andeq r12,r12,#0x3f\r
- streqb r12,[r1,#5]\r
- tst r4, #0x80\r
- ldrb r12,[r1,#7]\r
- andeq r4, r4,#0x3f\r
- streqb r4, [r1,#6]\r
- tst r12,#0x80\r
- andeq r12,r12,#0x3f\r
- streqb r12,[r1,#7]\r
- mov r12, #0xf\r
+ tst r1, #1\r
+ ldrneb r4, [r1]\r
+ mov r6, #0x3f\r
+ and r4, r4, #0x3f\r
+ strneb r4, [r1], #1\r
+ ldrh r4, [r1]\r
+ orr r6, r6, r6, lsl #8\r
+ and r4, r4, r6\r
+ strh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ and r4, r4, r6\r
+ strh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ and r4, r4, r6\r
+ strh r4, [r1], #2\r
+ ldrh r4, [r1]\r
+ and r4, r4, r6\r
+ streqh r4, [r1]\r
+ strneb r4, [r1]\r
+ b .dtfc_loop\r
+\r
+.dtfc_cut_tile:\r
+ add r4, r4, #7 @ 0-6\r
+ mov r4, r4, lsl #2\r
+ mov r12,#0xf<<28\r
+ mov r12,r12,asr r4\r
+ mov r2, r2, ror #16\r
+ tst r5, #0x0800 @ flipped?\r
+ mvnne r12,r12\r
+ and r2, r2, r12\r
+ mov r2, r2, ror #16\r
+ mov r12,#0xf\r
+ tst r8, #1\r
+ bne .dtfc_shadow\r
+ tst r2, r2\r
+ beq .dtfc_loop\r
+ tst r5, #0x0800\r
+ beq .dtfc_TileNorm\r
+ b .dtfc_TileFlip\r
+\r
+@ check if we have detected layer covered with hi-prio tiles:\r
+.dtfc_check_rendflags:\r
+ ldr r1, =rendstatus\r
+ ldr r2, [r1]\r
+ tst r2, #0xc0\r
+ beq .dtfc_loop\r
+ bic r8, r8, #1 @ sh/hi mode off\r
+ tst r2, #0x80\r
+ bne .dtfc_loop @ already processed\r
+ orr r2, r2, #0x80\r
+ str r2, [r1]\r
+\r
+ add r1, r11,#8\r
+ mov r3, #320/4/4\r
+ mov r6, #0x3f\r
+ orr r6, r6, r6, lsl #8\r
+ orr r6, r6, r6, lsl #16\r
+.dtfc_loop_shprep:\r
+ ldmia r1, {r2,r4,r5,r7}\r
+ subs r3, r3, #1\r
+ and r2, r2, r6\r
+ and r4, r4, r6\r
+ and r5, r5, r6\r
+ and r7, r7, r6\r
+ stmia r1!,{r2,r4,r5,r7}\r
+ bne .dtfc_loop_shprep\r
+\r
+ mvn r5, #0 @ r5=prevcode=-1\r
b .dtfc_loop\r
\r
.pool\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
\r
-.global DrawSpritesFromCache @ int *hc, int sh\r
+.global DrawSpritesFromCache @ int *hc, int maxwidth, int prio, int sh\r
\r
DrawSpritesFromCache:\r
stmfd sp!, {r4-r11,lr}\r
\r
@ cache some stuff to avoid mem access\r
+.if OVERRIDE_HIGHCOL\r
+ ldr r11,=HighCol\r
+ mov r12,#0xf\r
+ ldr r11,[r11]\r
+.else\r
ldr r11,=HighCol\r
+ mov r12,#0xf\r
+.endif\r
ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
- mov r6, r1, lsl #31\r
+ mov r6, r3, lsl #31\r
orr r6, r6, #1<<30\r
- mov r12,#0xf\r
\r
mov r10, r0\r
\r
beq .dsfc_SingleColor @ tileline singlecolor \r
\r
tst r9, #0x10000\r
- beq .dsfc_TileNorm\r
+ bne .dsfc_TileFlip\r
\r
@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
- TileFlip r12\r
- b .dsfc_inloop\r
-\r
.dsfc_TileNorm:\r
TileNorm r12\r
b .dsfc_inloop\r
\r
+.dsfc_TileFlip:\r
+ TileFlip r12\r
+ b .dsfc_inloop\r
+\r
.dsfc_SingleColor:\r
tst r0, #1 @ not aligned?\r
and r4, r2, #0xf\r
b .dsfc_inloop\r
\r
.dsfc_shadow:\r
+ tst r9, #0x80000000\r
+ beq .dsfc_shadow_lowpri\r
+\r
cmp r2, r2, ror #4\r
beq .dsfc_singlec_sh\r
\r
tst r9, #0x10000\r
- beq .dsfc_TileNorm_sh\r
+ bne .dsfc_TileFlip_sh\r
\r
@ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
- TileFlipSh\r
- b .dsfc_inloop\r
-\r
.dsfc_TileNorm_sh:\r
TileNormSh\r
b .dsfc_inloop\r
\r
+.dsfc_TileFlip_sh:\r
+ TileFlipSh\r
+ b .dsfc_inloop\r
+\r
.dsfc_singlec_sh:\r
cmp r2, #0xe0000000\r
bcc .dsfc_SingleColor @ normal singlecolor tileline (carry inverted in ARM)\r
TileSingleSh\r
b .dsfc_inloop\r
\r
+.dsfc_shadow_lowpri:\r
+ tst r9, #0x10000\r
+ bne .dsfc_TileFlip_sh_lp\r
+\r
+.dsfc_TileNorm_sh_lp:\r
+ TileNormSh_onlyop_lp\r
+ b .dsfc_inloop\r
+\r
+.dsfc_TileFlip_sh_lp:\r
+ TileFlipSh_onlyop_lp\r
+ b .dsfc_inloop\r
+\r
.pool\r
\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
@ + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size\r
@ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8\r
\r
-.global DrawSprite @ unsigned int *sprite, int **hc, int sh\r
+.global DrawSprite @ unsigned int *sprite, int sh, int acc_sprites\r
\r
DrawSprite:\r
stmfd sp!, {r4-r9,r11,lr}\r
\r
+ orr r8, r2, r1, lsl #4\r
ldr r3, [r0] @ sprite[0]\r
ldr r7, =Scanline\r
mov r6, r3, lsr #28\r
ldr r9, [r0, #4]\r
sub r7, r7, r4, asr #16 @ r7=row=Scanline-sy\r
\r
- tst r2, r2\r
mov r2, r9, asr #16 @ r2=sx\r
- bic r9, r9, #0xfe000000\r
- orrne r9, r9, #1<<31 @ r9=code|(sh<<31)\r
+ mov r9, r9, lsl #16\r
+ mov r9, r9, lsr #16\r
+ orr r9, r9, r8, lsl #27 @ r9=code|sh[31]|as[27]\r
\r
tst r9, #0x1000\r
movne r4, r5, lsl #3\r
subne r4, r4, #1\r
subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y\r
\r
- mov r8, r9, lsl #21\r
- mov r8, r8, lsr #21\r
- add r8, r8, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down\r
- \r
+ add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down\r
tst r9, #0x0800\r
mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1);\r
rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now\r
\r
- mov r8, r8, lsl #4\r
+ mov r8, r8, lsl #21\r
+ mov r8, r8, lsr #17\r
and r7, r7, #7\r
add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address\r
\r
tst r9, #0x8000\r
- bne .dspr_cache @ if(code&0x8000) // high priority - cache it\r
-\r
+ tsteq r9, #(1<<27)\r
+ bne .dspr_cache @ if(code&0x8000) || as\r
+ tst r6, #0x4000\r
+ tstne r6, #0x2000\r
+ tstne r9, #(1<<31)\r
+ bne .dspr_cache @ (sh && pal == 0x30)\r
+\r
+.dspr_continue:\r
@ cache some stuff to avoid mem access\r
+.if OVERRIDE_HIGHCOL\r
ldr r11,=HighCol\r
- ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
mov r12,#0xf\r
+ ldr r11,[r11]\r
+.else\r
+ ldr r11,=HighCol\r
+ mov r12,#0xf\r
+.endif\r
+ ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
\r
mov r5, r5, lsl #4 @ delta<<=4; // Delta of address\r
and r4, r9, #0x6000\r
orr r9, r9, r4, lsl #16\r
- orr r9, r9, #0x10000000 @ r9=scc1 ???? ... <code> (s=shadow/hilight, cc=pal)\r
+ orrs r9, r9, #0x10000000 @ r9=scc1 a??? ... <code> (s=shadow/hilight, cc=pal, a=acc_spr)\r
\r
- tst r9, #1<<31\r
mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30);\r
- orrne r3, r3, #0x40 @ shadow by default\r
+ orrmi r3, r3, #0x40 @ shadow by default\r
\r
add r6, r6, #1 @ inc now\r
adds r0, r2, #0 @ mov sx to r0 and set ZV flags\r
beq .dspr_SingleColor @ tileline singlecolor \r
\r
tst r9, #0x0800\r
- beq .dspr_TileNorm\r
+ bne .dspr_TileFlip\r
\r
@ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
- TileFlip r12\r
- b .dspr_loop\r
-\r
@ scratch: r4, r7\r
.dspr_TileNorm:\r
TileNorm r12\r
b .dspr_loop\r
\r
+.dspr_TileFlip:\r
+ TileFlip r12\r
+ b .dspr_loop\r
+\r
+.dspr_singlec_sh:\r
+ cmp r2, #0xe0000000\r
+ bcs .dspr_loop @ operator tileline, ignore\r
+\r
.dspr_SingleColor:\r
and r4, r2, #0xf\r
orr r4, r3, r4\r
beq .dspr_singlec_sh\r
\r
tst r9, #0x0800\r
- beq .dspr_TileNorm_sh\r
+ bne .dspr_TileFlip_sh\r
\r
@ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
- TileFlipSh\r
- b .dspr_loop\r
-\r
.dspr_TileNorm_sh:\r
- TileNormSh\r
+ TileNormSh_noop\r
b .dspr_loop\r
\r
-.dspr_singlec_sh:\r
- cmp r2, #0xe0000000\r
- bcc .dspr_SingleColor @ normal tileline\r
- tst r2, #0x10000000\r
- bne .dspr_sh_sh\r
- TileSingleHi\r
- b .dspr_loop\r
-\r
-.dspr_sh_sh:\r
- TileSingleSh\r
+.dspr_TileFlip_sh:\r
+ TileFlipSh_noop\r
b .dspr_loop\r
\r
\r
.dspr_cache:\r
- @ *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>24)&0xf);\r
+ @ *HighCacheS_ptr++ = ((code&0x8000)<<16)|(tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|pal|((sprite[0]>>16)&0xf);\r
+ ldr r1, =HighCacheS_ptr\r
mov r4, r8, lsl #16 @ tile\r
tst r9, #0x0800\r
orrne r4, r4, #0x10000 @ code&0x0800\r
- mov r2, r2, lsl #22\r
- orr r4, r4, r2, lsr #16 @ (sx<<6)&0x0000ffc0\r
- and r2, r9, #0x6000\r
- orr r4, r4, r2, lsr #9 @ (code>>9)&0x30\r
+ mov r0, r2, lsl #22\r
+ orr r4, r4, r0, lsr #16 @ (sx<<6)&0x0000ffc0\r
+ and r0, r9, #0x6000\r
+ orr r4, r4, r0, lsr #9 @ (code>>9)&0x30\r
mov r3, r3, lsl #12\r
- ldr r2, [r1]\r
orr r4, r4, r3, lsr #28 @ (sprite[0]>>24)&0xf\r
\r
- str r4, [r2], #4\r
- str r2, [r1]\r
+ ldr r0, [r1]\r
+ tst r9, #0x8000\r
+ orrne r4, r4, #0x80000000 @ prio\r
\r
- ldmfd sp!, {r4-r9,r11,lr}\r
- bx lr\r
+ str r4, [r0], #4\r
+ str r0, [r1]\r
+\r
+ and r0, r9, #(1<<27) @ as\r
+ teqne r0, #(1<<27) @ (code&0x8000) && !as\r
+ ldmnefd sp!, {r4-r9,r11,pc}\r
+ b .dspr_continue @ draw anyway if accurate sprites enabled\r
\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r
\r
ldr r6, =rendstatus\r
ldr lr, =(Pico+0x10000) @ lr=Pico.vram\r
- ldrb r6, [r6]\r
+ ldr r6, [r6]\r
\r
@ fetch the first code now\r
ldrh r7, [lr, r12]\r
\r
ands r6, r6, #2 @ we care about bit 1 only\r
orr r6, r6, r2\r
- bne .dw_no_sameprio\r
\r
- cmp r2, r7, lsr #15\r
- ldmnefd sp!, {r4-r11,pc} @ assume that whole window uses same priority\r
+ teqne r2, r7, lsr #15 @ do prio bits differ?\r
+ ldmnefd sp!, {r4-r11,pc} @ yes, assume that whole window uses same priority\r
\r
-.dw_no_sameprio:\r
orr r6, r6, r3, lsl #8 @ shadow mode\r
\r
sub r8, r1, r0\r
- mov r8, r8, lsl #1 @ cells\r
-\r
- mvn r9, #0 @ r9=prevcode=-1\r
\r
@ cache some stuff to avoid mem access\r
+.if OVERRIDE_HIGHCOL\r
+ ldr r11,=HighCol\r
+ mov r8, r8, lsl #1 @ cells\r
+ ldr r11,[r11]\r
+ mvn r9, #0 @ r9=prevcode=-1\r
+ add r11,r11,#8\r
+.else\r
ldr r11,=(HighCol+8)\r
- add r1, r11, r0, lsl #4 @ r1=pdest\r
+ mov r8, r8, lsl #1 @ cells\r
+ mvn r9, #0 @ r9=prevcode=-1\r
+.endif\r
+ add r1, r11, r0, lsl #4 @ r1=pdest\r
mov r0, #0xf\r
b .dwloop_enter\r
\r
- @ r4,r5 & r7 are scratch in this loop\r
+ @ r4,r5 are scratch in this loop\r
.dwloop:\r
add r1, r1, #8\r
.dwloop_nor1:\r
beq .dw_SingleColor @ tileline singlecolor \r
\r
tst r9, #0x0800\r
- beq .dw_TileNorm\r
+ bne .dw_TileFlip\r
\r
@ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
- TileFlip r0\r
- b .dwloop\r
-\r
.dw_TileNorm:\r
TileNorm r0\r
b .dwloop\r
\r
+.dw_TileFlip:\r
+ TileFlip r0\r
+ b .dwloop\r
+\r
.dw_SingleColor:\r
and r4, r0, r2 @ #0x0000000f\r
orr r4, r3, r4\r
orreq r3, r3, #0x40\r
beq .dw_shadow_done\r
ldr r4, [r1]\r
- tst r4, #0x00000080\r
- biceq r4, r4, #0x000000c0\r
- tst r4, #0x00008000\r
- biceq r4, r4, #0x0000c000\r
- tst r4, #0x00800000\r
- biceq r4, r4, #0x00c00000\r
- tst r4, #0x80000000\r
- biceq r4, r4, #0xc0000000\r
+ mov r5, #0x3f\r
+ orr r5, r5, r5, lsl #8\r
+ orr r5, r5, r5, lsl #16\r
+ and r4, r4, r5\r
str r4, [r1]\r
ldr r4, [r1,#4]\r
- tst r4, #0x00000080\r
- biceq r4, r4, #0x000000c0\r
- tst r4, #0x00008000\r
- biceq r4, r4, #0x0000c000\r
- tst r4, #0x00800000\r
- biceq r4, r4, #0x00c00000\r
- tst r4, #0x80000000\r
- biceq r4, r4, #0xc0000000\r
+ and r4, r4, r5\r
str r4, [r1,#4]\r
b .dw_shadow_done\r
\r
bne .fl_loopcpBGR444_hi\r
\r
sub r3, r4, #0x40*3*2\r
+ mov r6, #1\r
\r
\r
.fl_noshBGR444:\r
- ldr r1, =(HighCol+8)\r
+ ldr r12,=rendstatus\r
+ eors r6, r6, #1 @ sh is 0\r
+ ldr r12,[r12]\r
mov lr, #0xff\r
+ tstne r12,#(1<<2) @ and PDRAW_ACC_SPRITES\r
+\r
+.if OVERRIDE_HIGHCOL\r
+ ldr r1, =HighCol\r
+ movne lr, #0x3f\r
+ ldr r1, [r1]\r
+ mov lr, lr, lsl #1\r
+ add r1, r1, #8\r
+.else\r
+ ldr r1, =(HighCol+8)\r
+ movne lr, #0x3f\r
mov lr, lr, lsl #1\r
+.endif\r
\r
.fl_loopBGR444:\r
-\r
ldr r12, [r1], #4\r
subs r2, r2, #1\r
\r
ldrh r5, [r3, r5]\r
and r6, lr, r12, lsr #15\r
ldrh r6, [r3, r6]\r
+ and r12,lr, r12, lsr #23\r
+ ldrh r12,[r3, r12] @ 1c.i.\r
orr r4, r4, r5, lsl #16\r
-\r
- and r5, lr, r12, lsr #23\r
- ldrh r5, [r3, r5] @ 2c.i.\r
- orr r5, r6, r5, lsl #16\r
+ orr r5, r6, r12,lsl #16\r
\r
stmia r0!, {r4,r5}\r
bne .fl_loopBGR444\r
orr \reg, \reg, r3 @ add blue back\r
.endm\r
\r
+.global vidConvCpyRGB565\r
+\r
vidConvCpyRGB565: @ void *to, void *from, int pixels\r
stmfd sp!, {r4-r9,lr}\r
\r
- mov r12, r2, lsr #3 @ repeats\r
+ mov r12, r2, lsr #3 @ repeats\r
mov lr, #0x001c0000\r
orr lr, lr, #0x01c @ lr == pattern 0x001c001c\r
mov r8, #0x00030000\r
- orr r8, r8, #0x003 @ lr == pattern 0x001c001c\r
+ orr r8, r8, #0x003\r
\r
.loopRGB565:\r
ldmia r1!, {r4-r7}\r
bne .fl_loopcpRGB555_hi\r
\r
sub r3, r3, #0x40*2\r
+ mov r6, #1\r
\r
.fl_noshRGB555:\r
+ ldr r12,=rendstatus\r
+ eors r6, r6, #1 @ sh is 0\r
+ ldr r12,[r12]\r
+ mov lr, #0xff\r
+ tstne r12,#(1<<2) @ and PDRAW_ACC_SPRITES\r
+ movne lr, #0x3f\r
+\r
+.if OVERRIDE_HIGHCOL\r
+ ldr r1, =HighCol\r
+ ldr r0, =DrawLineDest\r
+ ldr r1, [r1]\r
+ ldr r0, [r0]\r
+ add r1, r1, #8\r
+.else\r
ldr r0, =DrawLineDest\r
ldr r1, =(HighCol+8)\r
ldr r0, [r0]\r
+.endif\r
\r
ldrb r12, [r8, #12]\r
- mov lr, #0xff\r
mov lr, lr, lsl #1\r
\r
tst r12, #1\r
addeq r0, r0, #32*2\r
\r
.fl_no32colRGB555:\r
-.fl_loopRGB555:\r
\r
+.if UNALIGNED_DRAWLINEDEST\r
+ @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer\r
+ tst r0, #2\r
+ bne .fl_RGB555u\r
+.endif\r
+\r
+.fl_loopRGB555:\r
ldr r12, [r1], #4\r
ldr r7, [r1], #4\r
\r
ldrh r6, [r3, r6]\r
and r12,lr, r7, lsr #15\r
ldrh r12,[r3, r12]\r
+ and r7, lr, r7, lsr #23\r
+ ldrh r7, [r3, r7]\r
orr r8, r8, r6, lsl #16\r
\r
- and r6, lr, r7, lsr #23\r
- ldrh r6, [r3, r6] @ 1 cycle interlock here (r6)\r
subs r2, r2, #1\r
- orr r12,r12, r6, lsl #16\r
+ orr r12,r12, r7, lsl #16\r
\r
stmia r0!, {r4,r5,r8,r12}\r
bne .fl_loopRGB555\r
mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
orr r9, r9, #0x00e7\r
\r
+.if UNALIGNED_DRAWLINEDEST\r
+ tst r0, #2\r
+ bne .fl_32scale_RGB555u\r
+.endif\r
+\r
.fl_loop32scale_RGB555:\r
ldr r12, [r1], #4\r
ldr r7, [r1], #4\r
ldmfd sp!, {r4-r8,lr}\r
bx lr\r
\r
+.if UNALIGNED_DRAWLINEDEST\r
+ @ unaligned versions of loops\r
+ @ warning: starts drawing 2bytes before dst\r
+\r
+.fl_RGB555u:\r
+ sub r0, r0, #2 @ initial adjustment\r
+ mov r8, #0\r
+\r
+.fl_loopRGB555u:\r
+ ldr r12, [r1], #4\r
+ ldr r7, [r1], #4\r
+\r
+ and r6, lr, r12,lsl #1\r
+ ldrh r6, [r3, r6]\r
+ and r5, lr, r12,lsr #7\r
+ ldrh r5, [r3, r5]\r
+ orr r4, r8, r6, lsl #16\r
+\r
+ and r6, lr, r12,lsr #15\r
+ ldrh r6, [r3, r6]\r
+ and r8, lr, r12,lsr #23\r
+ ldrh r8, [r3, r8]\r
+ orr r5, r5, r6, lsl #16\r
+\r
+ and r6, lr, r7, lsl #1\r
+ ldrh r6, [r3, r6]\r
+ and r12,lr, r7, lsr #7\r
+ ldrh r12,[r3, r12]\r
+ orr r6, r8, r6, lsl #16\r
+\r
+ and r8, lr, r7, lsr #15\r
+ ldrh r8, [r3, r8]\r
+ and r7, lr, r7, lsr #23\r
+\r
+ subs r2, r2, #1\r
+ orr r12,r12,r8, lsl #16\r
+ ldrh r8, [r3, r7]\r
+\r
+ stmia r0!, {r4,r5,r6,r12}\r
+ bne .fl_loopRGB555u\r
+\r
+ strh r8, [r0], #2\r
+\r
+ ldmfd sp!, {r4-r8,lr}\r
+ bx lr\r
+\r
+\r
+.fl_32scale_RGB555u:\r
+ sub r0, r0, #2 @ initial adjustment\r
+ mov r4, #0\r
+\r
+ @ r9 f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
+.fl_loop32scale_RGB555u:\r
+ ldr r12, [r1], #4\r
+ ldr r7, [r1], #4\r
+\r
+ and r6, lr, r12,lsl #1\r
+ ldrh r6, [r3, r6]\r
+ and r5, lr, r12,lsr #7\r
+ ldrh r5, [r3, r5]\r
+ and r6, r6, r9, lsl #2\r
+ orr r4, r4, r6, lsl #16 @ r4 = pix_d -1, 0\r
+\r
+ and r5, r5, r9, lsl #2\r
+ sub r8, r5, r5, lsr #2 @ r8 = 3/4 pix_s 1\r
+ add r6, r8, r6, lsr #2 @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1)\r
+ orr r5, r6, r5, lsl #15\r
+\r
+ and r6, lr, r12,lsr #15\r
+ ldrh r6, [r3, r6]\r
+ and r12,lr, r12,lsr #23\r
+ ldrh r12,[r3, r12]\r
+ and r6, r6, r9, lsl #2\r
+ add r5, r5, r6, lsl #15 @ r5 = pix_d 1, 2\r
+\r
+ and r8, lr, r7, lsl #1\r
+ ldrh r8, [r3, r8]\r
+ and r10,lr, r7, lsr #7\r
+ ldrh r10,[r3, r10]\r
+ and r12,r12,r9, lsl #2\r
+ sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2\r
+ add r6, r6, r12,lsr #2\r
+ orr r6, r6, r12,lsl #16 @ r6 = pix_d 3, 4\r
+\r
+ and r8, r8, r9, lsl #2\r
+ and r10,r10,r9, lsl #2\r
+ sub r12,r10,r10,lsr #2 @ r12 = 3/4 pix_s 5\r
+ orr r8, r8, r8, lsl #14\r
+ add r8, r8, r12,lsl #16 @ r8 = pix_d 5, 6\r
+ and r12,lr, r7, lsr #15\r
+ ldrh r12,[r3, r12]\r
+ and r7, lr, r7, lsr #23\r
+ ldrh r7, [r3, r7]\r
+ and r12,r12,r9, lsl #2\r
+ add r10,r10,r12\r
+ mov r10,r10, lsr #1\r
+ sub r12,r12,r12,lsr #2 @ r12 = 3/4 pix_s 6\r
+ orr r10,r10,r12,lsl #16\r
+ and r7, r7, r9, lsl #2\r
+ add r10,r10,r7, lsl #14 @ r10 = pix_d 7, 8\r
+\r
+ subs r2, r2, #1\r
+\r
+ stmia r0!, {r4,r5,r6,r8,r10}\r
+ mov r4, r7\r
+ bne .fl_loop32scale_RGB555u\r
+\r
+ strh r4, [r0], #2\r
+\r
+ ldmfd sp!, {r9,r10}\r
+ ldmfd sp!, {r4-r8,lr}\r
+ bx lr\r
+\r
+.endif @ UNALIGNED_DRAWLINEDEST\r
+\r
\r
@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
\r