X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=Pico%2FDraw.s;h=e31ba0baa6911ad14782bf7823d97d4b6051c6af;hb=32826a1a22fd3e6203310bba855d8c2b6f3c403a;hp=749aac538517a27893105412c77fcccdca561dab;hpb=cc68a136aa179a5f32fe40208371eb9c2b0aadae;p=picodrive.git diff --git a/Pico/Draw.s b/Pico/Draw.s index 749aac5..e31ba0b 100644 --- a/Pico/Draw.s +++ b/Pico/Draw.s @@ -1,7 +1,9 @@ +@ vim:filetype=armasm + @ assembly "optimized" version of some funtions from draw.c @ this is highly specialized, be careful if changing related C code! -@ (c) Copyright 2006, notaz +@ (c) Copyright 2007, Grazvydas "notaz" Ignotas @ All Rights Reserved @@ -12,7 +14,6 @@ .extern HighSprZ .extern rendstatus .extern DrawLineDest -.extern DrawStripVSRam .extern DrawStripInterlace @@ -68,7 +69,7 @@ streqb r4, [r1,#\offs] .endm -@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits +@ TileNormShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits .macro TileNormShHP TilePixelShHP 12, 0 @ #0x0000f000 TilePixelShHP 8, 1 @ #0x00000f00 @@ -80,7 +81,7 @@ TilePixelShHP 16, 7 @ #0x000f0000 .endm -@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf +@ TileFlipShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf .macro TileFlipShHP TilePixelShHP 16, 0 @ #0x000f0000 TilePixelShHP 20, 1 @ #0x00f00000 @@ -210,9 +211,9 @@ @ int cells; // 0x14 @ }; -@ int DrawLayer(int plane, int *hcache, int maxcells, int sh) +@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); -.global DrawLayer @ int plane, int *hcache, int maxcells, int sh +.global DrawLayer DrawLayer: stmfd sp!, {r4-r11,lr} @@ -220,10 +221,11 @@ DrawLayer: ldr r11, =(Pico+0x22228) @ Pico.video mov r8, #1 - ldrb r7, [r11, #16] @ ??hh??ww + ldrb r7, [r11, #16] @ ??vv??hh mov r6, r1 @ hcache - orr r9, r2, r3, lsl #31 @ r9=maxcells|(sh<<31) + orr r9, r3, r0, lsl #30 + orr r9, r9, r2, lsl #8 @ r9=sh[31]|cellskip[15:8]|maxcells[7:0] (tmp) mov r1, r7, lsl #4 orr r1, r1, #0x00ff @@ -243,7 +245,7 @@ DrawLayer: sub r5, r5, #1 @ r5=xmask @ Find name table: - tst r0, r0 + ands r0, r0, #1 ldreqb r12, [r11, #2] ldrneb r12, [r11, #4] @@ -291,7 +293,6 @@ DrawLayer: add r12, r12, r4, lsl r10 @ nametab+=(ts.line>>3)<hscroll, r5=ts->xmask, r6=ts->hc, r9=ts->cells -@ mov r12,r1, lsl #1 @ r12=(ts->nametab<<1) (halfword compliant) and r10,r2, #7 mov r10,r10, lsl #1 @ r10=ty=(ts->line&7)<<1; @@ -306,14 +307,18 @@ DrawLayer: tst r9, #1<<31 mov r3, #0 - orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|ty) + orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty) movne r3, #0x40 @ default to shadowed pal on sh mode - mvn r9, #0 @ r9=prevcode=-1 - cmp r7, #8 addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll + and r9, r9, #0xff00 + add r8, r8, r9, lsr #8 @ tilex+=cellskip + add r7, r7, r9, lsr #5 @ dx+=cellskip<<3; + sub r10,r10,r9, lsl #16 @ cells-=cellskip + mvn r9, #0 @ r9=prevcode=-1 + @ cache some stuff to avoid mem access ldr r11,=HighCol mov r0, #0xf @@ -342,6 +347,7 @@ DrawLayer: beq .DrawStrip_samecode @ we know stuff about this tile already mov r9, r7 @ remember code + orr r10, r10, #1<<21 @ seen non hi-prio tile movs r2, r9, lsl #20 @ if (code&0x1000) mov r2, r2, lsl #1 @@ -363,16 +369,17 @@ DrawLayer: beq .DrawStrip_SingleColor @ tileline singlecolor tst r9, #0x0800 - beq .DrawStrip_TileNorm + bne .DrawStrip_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern - TileFlip r0 - b .dsloop - .DrawStrip_TileNorm: TileNorm r0 b .dsloop +.DrawStrip_TileFlip: + TileFlip r0 + b .dsloop + .DrawStrip_SingleColor: and r4, r2, #0xf orr r4, r3, r4 @@ -386,6 +393,20 @@ DrawLayer: strneb r4, [r1], #1 @ have a remaining unaligned pixel? b .dsloop_subr1 +.DrawStrip_hiprio_maybempt: + cmp r7, r9 + beq .dsloop @ must've been empty, otherwise we wouldn't get here + movs r2, r7, lsl #20 @ if (code&0x1000) + mov r2, r2, lsl #1 + add r2, r2, r10, lsl #17 + mov r2, r2, lsr #17 + eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; + ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + mov r9, r7 @ remember code + tst r2, r2 + beq .dsloop + orr r10, r10, #1<<22 + .DrawStrip_hiprio: tst r10, #0x00c00000 beq .DrawStrip_hiprio_maybempt @@ -398,9 +419,163 @@ DrawLayer: mov r0, #0xf b .dsloop -.DrawStrip_hiprio_maybempt: +.dsloop_exit: + tst r10, #1<<21 @ seen non hi-prio tile + ldreq r1, =rendstatus + mov r0, #0 + ldreq r2, [r1] + str r0, [r6] @ terminate the cache list + orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles + streq r2, [r1] + + ldmfd sp!, {r4-r11,lr} + bx lr + +@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + +.DrawStrip_vsscroll: + rsb r8, r3, #0 + mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3 + bic r8, r8, #0xff000000 + orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[23:0]) + + ldr r4, =Scanline + orr r5, r1, r10, lsl #24 + ldr r4, [r4] + sub r1, r3, #1 + orr r5, r5, r4, lsl #16 @ r5=(shift_width[31:24]|scanline[23:16]|ymask[15:0]) + and r1, r1, #7 + add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1 + + mov r10,r9, lsl #16 + tst r0, #1 + orrne r10,r10, #0x8000 + tst r9, #1<<31 + mov r3, #0 + orr r10,r10, #0xff000000 @ will be adjusted on entering loop + orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) + movne r3, #0x40 @ default to shadowed pal on sh mode + + cmp r7, #8 + subne r10,r10, #0x01000000 @ have hscroll, start with negative cell + + and r9, r9, #0xff00 + add r8, r8, r9, lsr #8 @ tilex+=cellskip + add r7, r7, r9, lsr #5 @ dx+=cellskip<<3; + add r10,r10,r9, lsl #16 @ cell+=cellskip + mvn r9, #0 @ r9=prevcode=-1 + + @ cache some stuff to avoid mem access + ldr r11,=HighCol + mov r0, #0xf + add r1, r11, r7 @ r1=pdest + + + @ r4 & r7 are scratch in this loop +.dsloop_vs_subr1: + sub r1, r1, #8 +.dsloop_vs: @ 40-41 times + add r10,r10, #0x01000000 + and r4, r10, #0x003f0000 + cmp r4, r10, asr #8 + ble .dsloop_vs_exit + + @ calc offset and read tileline code to r7, also calc ty + add r7, lr, #0x012000 + add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180) + add r7, r7, r10,asr #23 @ vsram + ((cell&~1)<<1) + bic r7, r7, #3 + tst r10,#0x8000 @ plane1? + addne r7, r7, #2 + ldrh r7, [r7] @ r7=vscroll + + bic r10,r10,#0xff @ clear old ty + and r4, r5, #0xff0000 + add r4, r4, r7, lsl #16 + and r4, r4, r5, lsl #16 @ r4=line<<16 + and r7, r4, #0x70000 + orr r10,r10,r7, lsr #15 @ new ty + + mov r4, r4, lsr #19 + mov r7, r5, lsr #24 + mov r4, r4, lsl r7 @ nametabadd + + and r7, r8, r8, lsr #25 + add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords) + add r7, r7, r4, lsl #1 + ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend) + + add r1, r1, #8 + add r8, r8, #1 + + tst r7, #0x8000 + bne .DrawStrip_vs_hiprio + cmp r7, r9 - beq .dsloop @ must've been empty, otherwise we wouldn't get here + beq .DrawStrip_vs_samecode @ we know stuff about this tile already + + mov r9, r7 @ remember code + orr r8, r8, #1<<24 @ seen non hi-prio tile + + movs r2, r9, lsl #20 @ if (code&0x1000) + mov r2, r2, lsl #1 + add r2, r2, r10, lsl #17 + mov r2, r2, lsr #17 + eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; + + ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + + bic r7, r3, #0x3f + and r3, r9, #0x6000 + add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); + +.DrawStrip_vs_samecode: + tst r2, r2 + beq .dsloop_vs @ tileline blank + + cmp r2, r2, ror #4 + beq .DrawStrip_vs_SingleColor @ tileline singlecolor + + tst r9, #0x0800 + bne .DrawStrip_vs_TileFlip + + @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern +.DrawStrip_vs_TileNorm: + TileNorm r0 + b .dsloop_vs + +.DrawStrip_vs_TileFlip: + TileFlip r0 + b .dsloop_vs + +.DrawStrip_vs_SingleColor: + and r4, r2, #0xf + orr r4, r3, r4 + orr r4, r4, r4, lsl #8 + tst r1, #1 @ not aligned? + strneb r4, [r1], #1 + streqh r4, [r1], #2 + strh r4, [r1], #2 + strh r4, [r1], #2 + strh r4, [r1], #2 + strneb r4, [r1], #1 @ have a remaining unaligned pixel? + b .dsloop_vs_subr1 + +.DrawStrip_vs_hiprio: + tst r10, #0x00c00000 + beq .DrawStrip_vs_hiprio_maybempt + sub r0, r1, r11 + orr r7, r7, r0, lsl #16 + orr r7, r7, r10, lsl #25 @ (ty<<25) + tst r7, #0x1000 + eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26; + str r7, [r6], #4 @ cache hi priority tile + mov r0, #0xf + b .dsloop_vs + +.DrawStrip_vs_hiprio_maybempt: + cmp r7, r9 + beq .dsloop_vs @ must've been empty, otherwise we wouldn't get here movs r2, r7, lsl #20 @ if (code&0x1000) mov r2, r2, lsl #1 add r2, r2, r10, lsl #17 @@ -410,41 +585,23 @@ DrawLayer: mov r9, r7 @ remember code tst r2, r2 orrne r10, r10, #1<<22 - bne .DrawStrip_hiprio - b .dsloop + bne .DrawStrip_vs_hiprio + b .dsloop_vs -.dsloop_exit: +.dsloop_vs_exit: + tst r8, #1<<24 @ seen non hi-prio tile + ldreq r1, =rendstatus mov r0, #0 + ldreq r2, [r1] str r0, [r6] @ terminate the cache list + orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles + streq r2, [r1] ldmfd sp!, {r4-r11,lr} bx lr -.DrawStrip_vsscroll: - @ shit, we have 2-cell column based vscroll - @ let the c code handle this (for now) - - @ int nametab; // 0x00 - @ int line; // 0x04 - @ int hscroll; // 0x08 - @ int xmask; // 0x0C - @ int *hc; // 0x10 (pointer to cache buffer) - @ int cells; // 0x14 - - sub sp, sp, #6*4 - orr r2, r1, r10, lsl #24 @ ts.line=ymask|(shift[width]<<24); // save some stuff instead of line - mov r1, r0 @ plane - mov r0, r12, lsr #1 @ halfwords - and r9, r9, #0xff - stmia sp, {r0,r2,r3,r5,r6,r9} - - mov r0, sp - bl DrawStripVSRam @ struct TileStrip *ts, int plane - - add sp, sp, #6*4 - ldmfd sp!, {r4-r11,lr} - bx lr +@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @ interlace mode 2? Sonic 2? .DrawStrip_interlace: @@ -517,26 +674,28 @@ BackFill: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawTilesFromCache @ int *hc, int sh +.global DrawTilesFromCache @ int *hc, int sh, int rlim DrawTilesFromCache: stmfd sp!, {r4-r8,r11,lr} - mvn r5, #0 @ r5=prevcode=-1 - mov r8, r1 - @ cache some stuff to avoid mem access ldr r11,=HighCol ldr lr, =(Pico+0x10000) @ lr=Pico.vram mov r12,#0xf + mvn r5, #0 @ r5=prevcode=-1 + ands r8, r1, #1 + orr r8, r8, r2, lsl #1 + bne .dtfc_check_rendflags + @ scratch: r4, r7 .dtfc_loop: ldr r6, [r0], #4 @ read code movs r1, r6, lsr #16 @ r1=dx; ldmeqfd sp!, {r4-r8,r11,pc} @ dx is never zero, this must be a terminator, return - bic r1, r1, #0xfe00 - add r1, r11, r1 @ r1=pdest + bic r4, r1, #0xfe00 + add r1, r11, r4 @ r1=pdest mov r7, r6, lsl #16 cmp r5, r7, lsr #16 @@ -554,7 +713,10 @@ DrawTilesFromCache: ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels .dtfc_samecode: - tst r8, r8 + rsbs r4, r4, r8, lsr #1 + bmi .dtfc_cut_tile + + tst r8, #1 bne .dtfc_shadow tst r2, r2 @@ -564,16 +726,17 @@ DrawTilesFromCache: beq .dtfc_SingleColor @ tileline singlecolor tst r5, #0x0800 - beq .dtfc_TileNorm + bne .dtfc_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlip r12 - b .dtfc_loop - .dtfc_TileNorm: TileNorm r12 b .dtfc_loop +.dtfc_TileFlip: + TileFlip r12 + b .dtfc_loop + .dtfc_SingleColor: and r4, r2, #0xf orr r4, r3, r4 @@ -595,16 +758,17 @@ DrawTilesFromCache: beq .dtfc_SingleColor @ tileline singlecolor tst r5, #0x0800 - beq .dtfc_TileNormShHP + bne .dtfc_TileFlipShHP @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlipShHP - b .dtfc_loop - .dtfc_TileNormShHP: TileNormShHP b .dtfc_loop +.dtfc_TileFlipShHP: + TileFlipShHP + b .dtfc_loop + .dtfc_shadow_blank: ldrb r4, [r1] @ 1ci ldrb r12,[r1,#1] @@ -641,6 +805,64 @@ DrawTilesFromCache: mov r12, #0xf b .dtfc_loop +.dtfc_cut_tile: + add r4, r4, #7 @ 0-6 + mov r4, r4, lsl #2 + mov r12,#0xf<<28 + mov r12,r12,asr r4 + mov r2, r2, ror #16 + tst r5, #0x0800 @ flipped? + mvnne r12,r12 + and r2, r2, r12 + mov r2, r2, ror #16 + mov r12,#0xf + tst r8, #1 + bne .dtfc_shadow + tst r2, r2 + beq .dtfc_loop + tst r5, #0x0800 + beq .dtfc_TileNorm + b .dtfc_TileFlip + +@ check if we have detected layer covered with hi-prio tiles: +.dtfc_check_rendflags: + ldr r1, =rendstatus + ldr r2, [r1] + tst r2, #0xc0 + beq .dtfc_loop + bic r8, r8, #1 @ sh/hi mode off + tst r2, #0x80 + bne .dtfc_loop @ already processed + orr r2, r2, #0x80 + str r2, [r1] + + add r1, r11,#8 + mov r3, #320/4 + mov r7, #0x80 + orr r7, r7, r7, lsl #8 + orr r7, r7, r7, lsl #16 + mov r6, #0x3f + orr r6, r6, r6, lsl #8 + orr r6, r6, r6, lsl #16 +.dtfc_loop_shprep: + subs r3, r3, #1 + bmi .dtfc_loop @ done + ldr r2, [r1] + tst r2, r7 + andeq r2, r2, r6 + streq r2, [r1], #4 + beq .dtfc_loop_shprep + tst r2, #0x80000000 + biceq r2, r2, #0xc0000000 + tst r2, #0x00800000 + biceq r2, r2, #0x00c00000 + tst r2, #0x00008000 + biceq r2, r2, #0x0000c000 + tst r2, #0x00000080 + biceq r2, r2, #0x000000c0 + str r2, [r1], #4 + b .dtfc_loop_shprep + .pool @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -717,16 +939,17 @@ DrawSpritesFromCache: beq .dsfc_SingleColor @ tileline singlecolor tst r9, #0x10000 - beq .dsfc_TileNorm + bne .dsfc_TileFlip @ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlip r12 - b .dsfc_inloop - .dsfc_TileNorm: TileNorm r12 b .dsfc_inloop +.dsfc_TileFlip: + TileFlip r12 + b .dsfc_inloop + .dsfc_SingleColor: tst r0, #1 @ not aligned? and r4, r2, #0xf @@ -745,16 +968,17 @@ DrawSpritesFromCache: beq .dsfc_singlec_sh tst r9, #0x10000 - beq .dsfc_TileNorm_sh + bne .dsfc_TileFlip_sh @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlipSh - b .dsfc_inloop - .dsfc_TileNorm_sh: TileNormSh b .dsfc_inloop +.dsfc_TileFlip_sh: + TileFlipSh + b .dsfc_inloop + .dsfc_singlec_sh: cmp r2, #0xe0000000 bcc .dsfc_SingleColor @ normal singlecolor tileline (carry inverted in ARM) @@ -861,17 +1085,18 @@ DrawSprite: beq .dspr_SingleColor @ tileline singlecolor tst r9, #0x0800 - beq .dspr_TileNorm + bne .dspr_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlip r12 - b .dspr_loop - @ scratch: r4, r7 .dspr_TileNorm: TileNorm r12 b .dspr_loop +.dspr_TileFlip: + TileFlip r12 + b .dspr_loop + .dspr_SingleColor: and r4, r2, #0xf orr r4, r3, r4 @@ -890,16 +1115,17 @@ DrawSprite: beq .dspr_singlec_sh tst r9, #0x0800 - beq .dspr_TileNorm_sh + bne .dspr_TileFlip_sh @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlipSh - b .dspr_loop - .dspr_TileNorm_sh: TileNormSh b .dspr_loop +.dspr_TileFlip_sh: + TileFlipSh + b .dspr_loop + .dspr_singlec_sh: cmp r2, #0xe0000000 bcc .dspr_SingleColor @ normal tileline @@ -1027,16 +1253,17 @@ DrawWindow: beq .dw_SingleColor @ tileline singlecolor tst r9, #0x0800 - beq .dw_TileNorm + bne .dw_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern - TileFlip r0 - b .dwloop - .dw_TileNorm: TileNorm r0 b .dwloop +.dw_TileFlip: + TileFlip r0 + b .dwloop + .dw_SingleColor: and r4, r0, r2 @ #0x0000000f orr r4, r3, r4 @@ -1285,35 +1512,21 @@ vidConvCpyRGB565: @ void *to, void *from, int pixels FinalizeLineRGB555: stmfd sp!, {r4-r8,lr} - ldr r5, =(Pico+0x22228) @ Pico.video + ldr r8, =(Pico+0x22228) @ Pico.video ldr r4, =HighPal - ldrb r7, [r5, #-0x1a] @ 0x2220e ~ dirtyPal + ldrb r7, [r8, #-0x1a] @ 0x2220e ~ dirtyPal mov r6, r0 mov r1, #0 tst r7, r7 beq .fl_noconvRGB555 - strb r1, [r5, #-0x1a] - sub r1, r5, #0x128 @ r1=Pico.cram + strb r1, [r8, #-0x1a] + sub r1, r8, #0x128 @ r1=Pico.cram mov r0, r4 mov r2, #0x40 bl vidConvCpyRGB565 .fl_noconvRGB555: - ldrb r12, [r5, #12] - ldr r0, =DrawLineDest - ldr r0, [r0] - - tst r12, #1 - movne r2, #320/8 @ len - bne .fl_no32colRGB555 - ldr r3, =PicoOpt - mov r2, #256/8 - ldr r3, [r3] - tst r3, #0x100 - addeq r0, r0, #32*2 - -.fl_no32colRGB555: mov r3, r4 tst r6, r6 beq .fl_noshRGB555 @@ -1348,12 +1561,27 @@ FinalizeLineRGB555: sub r3, r3, #0x40*2 - .fl_noshRGB555: + ldr r0, =DrawLineDest ldr r1, =(HighCol+8) + ldr r0, [r0] + + ldrb r12, [r8, #12] mov lr, #0xff mov lr, lr, lsl #1 + tst r12, #1 + movne r2, #320/8 @ len + bne .fl_no32colRGB555 + ldr r4, =PicoOpt + mov r2, #256/8 + ldr r4, [r4] + tst r4, #0x4000 + bne .fl_32scale_RGB555 + tst r4, #0x0100 + addeq r0, r0, #32*2 + +.fl_no32colRGB555: .fl_loopRGB555: ldr r12, [r1], #4 @@ -1387,9 +1615,71 @@ FinalizeLineRGB555: stmia r0!, {r4,r5,r8,r12} bne .fl_loopRGB555 + ldmfd sp!, {r4-r8,lr} + bx lr + + +.fl_32scale_RGB555: + stmfd sp!, {r9,r10} + mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 + orr r9, r9, #0x00e7 + +.fl_loop32scale_RGB555: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12,lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + and r4, r4, r9, lsl #2 + orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0 + and r5, r5, r9, lsl #2 + sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 + add r4, r4, r6, lsl #16 @ pix_d 0, 1 + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r12,lr, r12,lsr #23 + ldrh r12,[r3, r12] + and r6, r6, r9, lsl #2 + add r5, r5, r6 + mov r5, r5, lsr #1 + sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 + orr r5, r5, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r12,r12,r9, lsl #2 + add r5, r5, r12,lsl #14 @ pix_d 2, 3 + and r6, r6, r9, lsl #2 + orr r6, r12,r6, lsl #16 @ pix_d 4, 5 + + and r12,lr, r7, lsr #7 + ldrh r12,[r3, r12] + and r10,lr, r7, lsr #15 + ldrh r10,[r3, r10] + and r12,r12,r9, lsl #2 + sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 + add r8, r8, r6, lsr #18 + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + and r10,r10,r9, lsl #2 + orr r8, r8, r10,lsl #15 + add r8, r8, r12,lsl #15 @ pix_d 6, 7 + sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 + and r7, r7, r9, lsl #2 + add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 + orr r10,r10,r7, lsl #16 @ pix_d 8, 9 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + bne .fl_loop32scale_RGB555 + ldmfd sp!, {r9,r10} ldmfd sp!, {r4-r8,lr} - bx lr + bx lr + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@