X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=Pico%2FDraw.s;h=036d3a9eb07e545b6db6950042099fd50645aa01;hb=c060a9ab9c428e1ed9c4159b56529a2a36031e44;hp=cb1a6fa7964d44cf064bb5711a69e324d11b5ac5;hpb=6d7acf9eff33cdde5e3eac44a193448ac0cbf541;p=picodrive.git diff --git a/Pico/Draw.s b/Pico/Draw.s index cb1a6fa..036d3a9 100644 --- a/Pico/Draw.s +++ b/Pico/Draw.s @@ -1,21 +1,30 @@ @ vim:filetype=armasm -@ assembly "optimized" version of some funtions from draw.c +@ ARM assembly versions of some funtions from draw.c @ this is highly specialized, be careful if changing related C code! -@ (c) Copyright 2007, Grazvydas "notaz" Ignotas +@ (c) Copyright 2007-2008, Grazvydas "notaz" Ignotas @ All Rights Reserved +.include "port_config.s" .extern Pico .extern PicoOpt .extern HighCol -.extern Scanline +.extern DrawScanline .extern HighSprZ .extern rendstatus +.extern HighPreSpr .extern DrawLineDest .extern DrawStripInterlace +.extern HighCacheS_ptr +.equ PDRAW_SPRITES_MOVED, (1<<0) +.equ PDRAW_WND_DIFF_PRIO, (1<<1) +.equ PDRAW_ACC_SPRITES, (1<<2) +.equ PDRAW_DIRTY_SPRITES, (1<<4) +.equ PDRAW_PLANE_HI_PRIO, (1<<6) +.equ PDRAW_SHHI_DONE, (1<<7) @ helper .macro TilePixel pat lsrr offs @@ -63,13 +72,11 @@ .endif ldreqb r4, [r1,#\offs] orrne r4, r3, r4 - strneb r4, [r1,#\offs] - tsteq r4, #0x80 andeq r4, r4, #0x3f - streqb r4, [r1,#\offs] + strb r4, [r1,#\offs] .endm -@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits +@ TileNormShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits .macro TileNormShHP TilePixelShHP 12, 0 @ #0x0000f000 TilePixelShHP 8, 1 @ #0x00000f00 @@ -81,7 +88,7 @@ TilePixelShHP 16, 7 @ #0x000f0000 .endm -@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf +@ TileFlipShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf .macro TileFlipShHP TilePixelShHP 16, 0 @ #0x000f0000 TilePixelShHP 20, 1 @ #0x00f00000 @@ -154,24 +161,17 @@ .else ands r4, r12, r2 .endif - beq 3f + beq 0f cmp r4, #0xe - beq 2f - bgt 1f - orr r4, r3, r4 - strb r4, [r1,#\ofs] - b 3f -1: - ldrb r4, [r1,#\ofs] @ 2ci - orr r4, r4, #0xc0 - strb r4, [r1,#\ofs] - b 3f -2: - ldrb r4, [r1,#\ofs] @ 2ci - bic r4, r4, #0xc0 - orr r4, r4, #0x80 + ldrgeb r4, [r1,#\ofs] + orrlt r4, r3, r4 @ normal + + biceq r4, r4, #0xc0 @ hilight + orreq r4, r4, #0x80 + orrgt r4, r4, #0xc0 @ shadow + strb r4, [r1,#\ofs] -3: +0: .endm @ TileFlipSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf @@ -198,6 +198,80 @@ TileDoShGenPixel 16, 7 @ #0x000f0000 .endm +.macro TileDoShGenPixel_noop shift ofs +.if \shift + and r4, r12, r2, lsr #\shift +.else + and r4, r12, r2 +.endif + sub r7, r4, #1 + cmp r7, #0xd + orrcc r4, r3, r4 @ 0-0xc (was 1-0xd) + strccb r4, [r1,#\ofs] +.endm + +.macro TileFlipSh_noop + TileDoShGenPixel_noop 16, 0 @ #0x000f0000 + TileDoShGenPixel_noop 20, 1 @ #0x00f00000 + TileDoShGenPixel_noop 24, 2 @ #0x0f000000 + TileDoShGenPixel_noop 28, 3 @ #0xf0000000 + TileDoShGenPixel_noop 0, 4 @ #0x0000000f + TileDoShGenPixel_noop 4, 5 @ #0x000000f0 + TileDoShGenPixel_noop 8, 6 @ #0x00000f00 + TileDoShGenPixel_noop 12, 7 @ #0x0000f000 +.endm + +.macro TileNormSh_noop + TileDoShGenPixel_noop 12, 0 @ #0x0000f000 + TileDoShGenPixel_noop 8, 1 @ #0x00000f00 + TileDoShGenPixel_noop 4, 2 @ #0x000000f0 + TileDoShGenPixel_noop 0, 3 @ #0x0000000f + TileDoShGenPixel_noop 28, 4 @ #0xf0000000 + TileDoShGenPixel_noop 24, 5 @ #0x0f000000 + TileDoShGenPixel_noop 20, 6 @ #0x00f00000 + TileDoShGenPixel_noop 16, 7 @ #0x000f0000 +.endm + +.macro TileDoShGenPixel_onlyop_lp shift ofs +.if \shift + ands r7, r12, r2, lsr #\shift +.else + ands r7, r12, r2 +.endif + ldrneb r4, [r1,#\ofs] + tstne r4, #0x40 + beq 0f + + cmp r7, #0xe + biceq r4, r4, #0xc0 @ hilight + orreq r4, r4, #0x80 + orrgt r4, r4, #0xc0 @ shadow + strgeb r4, [r1,#\ofs] +0: +.endm + +.macro TileFlipSh_onlyop_lp + TileDoShGenPixel_onlyop_lp 16, 0 @ #0x000f0000 + TileDoShGenPixel_onlyop_lp 20, 1 @ #0x00f00000 + TileDoShGenPixel_onlyop_lp 24, 2 @ #0x0f000000 + TileDoShGenPixel_onlyop_lp 28, 3 @ #0xf0000000 + TileDoShGenPixel_onlyop_lp 0, 4 @ #0x0000000f + TileDoShGenPixel_onlyop_lp 4, 5 @ #0x000000f0 + TileDoShGenPixel_onlyop_lp 8, 6 @ #0x00000f00 + TileDoShGenPixel_onlyop_lp 12, 7 @ #0x0000f000 +.endm + +.macro TileNormSh_onlyop_lp + TileDoShGenPixel_onlyop_lp 12, 0 @ #0x0000f000 + TileDoShGenPixel_onlyop_lp 8, 1 @ #0x00000f00 + TileDoShGenPixel_onlyop_lp 4, 2 @ #0x000000f0 + TileDoShGenPixel_onlyop_lp 0, 3 @ #0x0000000f + TileDoShGenPixel_onlyop_lp 28, 4 @ #0xf0000000 + TileDoShGenPixel_onlyop_lp 24, 5 @ #0x0f000000 + TileDoShGenPixel_onlyop_lp 20, 6 @ #0x00f00000 + TileDoShGenPixel_onlyop_lp 16, 7 @ #0x000f0000 +.endm + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -211,9 +285,9 @@ @ int cells; // 0x14 @ }; -@ int DrawLayer(int plane, int *hcache, int maxcells, int sh) +@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); -.global DrawLayer @ int plane, int *hcache, int maxcells, int sh +.global DrawLayer DrawLayer: stmfd sp!, {r4-r11,lr} @@ -221,10 +295,11 @@ DrawLayer: ldr r11, =(Pico+0x22228) @ Pico.video mov r8, #1 - ldrb r7, [r11, #16] @ ??hh??ww + ldrb r7, [r11, #16] @ ??vv??hh mov r6, r1 @ hcache - orr r9, r2, r3, lsl #31 @ r9=maxcells|(sh<<31) + orr r9, r3, r0, lsl #30 + orr r9, r9, r2, lsl #8 @ r9=sh[31]|cellskip[15:8]|maxcells[7:0] (tmp) mov r1, r7, lsl #4 orr r1, r1, #0x00ff @@ -244,11 +319,11 @@ DrawLayer: sub r5, r5, #1 @ r5=xmask @ Find name table: - tst r0, r0 + ands r0, r0, #1 ldreqb r12, [r11, #2] ldrneb r12, [r11, #4] - ldr r2, =Scanline @ trying to make good use of pipeline here + ldr r2, =DrawScanline @ trying to make good use of pipeline here ldr lr, =(Pico+0x10000) @ lr=Pico.vram moveq r12, r12, lsl #10 @@ -262,7 +337,7 @@ DrawLayer: mov r4, r8, lsr #8 @ pvid->reg[13] mov r4, r4, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords) tst r7, #2 - addne r4, r4, r2, lsl #2 @ htab+=Scanline<<1; // Offset by line + addne r4, r4, r2, lsl #2 @ htab+=DrawScanline<<1; // Offset by line tst r7, #1 biceq r4, r4, #0x1f @ htab&=~0xf; // Offset by tile add r4, r4, r0, lsl #1 @ htab+=plane @@ -306,17 +381,28 @@ DrawLayer: tst r9, #1<<31 mov r3, #0 - orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|ty) + orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty) movne r3, #0x40 @ default to shadowed pal on sh mode - mvn r9, #0 @ r9=prevcode=-1 - cmp r7, #8 addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll + and r9, r9, #0xff00 + add r8, r8, r9, lsr #8 @ tilex+=cellskip + add r7, r7, r9, lsr #5 @ dx+=cellskip<<3; + sub r10,r10,r9, lsl #16 @ cells-=cellskip + @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r0, #0xf + ldr r11,[r11] +.else ldr r11,=HighCol mov r0, #0xf +.endif + + mvn r9, #0 @ r9=prevcode=-1 add r1, r11, r7 @ r1=pdest @@ -342,6 +428,7 @@ DrawLayer: beq .DrawStrip_samecode @ we know stuff about this tile already mov r9, r7 @ remember code + orr r10, r10, #1<<21 @ seen non hi-prio tile movs r2, r9, lsl #20 @ if (code&0x1000) mov r2, r2, lsl #1 @@ -363,16 +450,17 @@ DrawLayer: beq .DrawStrip_SingleColor @ tileline singlecolor tst r9, #0x0800 - beq .DrawStrip_TileNorm + bne .DrawStrip_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern - TileFlip r0 - b .dsloop - .DrawStrip_TileNorm: TileNorm r0 b .dsloop +.DrawStrip_TileFlip: + TileFlip r0 + b .dsloop + .DrawStrip_SingleColor: and r4, r2, #0xf orr r4, r3, r4 @@ -386,18 +474,6 @@ DrawLayer: strneb r4, [r1], #1 @ have a remaining unaligned pixel? b .dsloop_subr1 -.DrawStrip_hiprio: - tst r10, #0x00c00000 - beq .DrawStrip_hiprio_maybempt - sub r0, r1, r11 - orr r7, r7, r0, lsl #16 - orr r7, r7, r10, lsl #25 @ (ty<<25) - tst r7, #0x1000 - eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26; - str r7, [r6], #4 @ cache hi priority tile - mov r0, #0xf - b .dsloop - .DrawStrip_hiprio_maybempt: cmp r7, r9 beq .dsloop @ must've been empty, otherwise we wouldn't get here @@ -409,13 +485,29 @@ DrawLayer: ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels mov r9, r7 @ remember code tst r2, r2 - orrne r10, r10, #1<<22 - bne .DrawStrip_hiprio + beq .dsloop + orr r10, r10, #1<<22 + +.DrawStrip_hiprio: + tst r10, #0x00c00000 + beq .DrawStrip_hiprio_maybempt + sub r0, r1, r11 + orr r7, r7, r0, lsl #16 + orr r7, r7, r10, lsl #25 @ (ty<<25) + tst r7, #0x1000 + eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26; + str r7, [r6], #4 @ cache hi priority tile + mov r0, #0xf b .dsloop .dsloop_exit: + tst r10, #1<<21 @ seen non hi-prio tile + ldreq r1, =rendstatus mov r0, #0 + ldreq r2, [r1] str r0, [r6] @ terminate the cache list + orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles + streq r2, [r1] ldmfd sp!, {r4-r11,lr} bx lr @@ -425,10 +517,10 @@ DrawLayer: .DrawStrip_vsscroll: rsb r8, r3, #0 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3 - bic r8, r8, #0xff000000 - orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|tilex[15:0]) + bic r8, r8, #0x3fc00000 + orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[21:0]) - ldr r4, =Scanline + ldr r4, =DrawScanline orr r5, r1, r10, lsl #24 ldr r4, [r4] sub r1, r3, #1 @@ -437,24 +529,34 @@ DrawLayer: add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1 mov r10,r9, lsl #16 - tst r0, r0 + tst r0, #1 orrne r10,r10, #0x8000 tst r9, #1<<31 mov r3, #0 orr r10,r10, #0xff000000 @ will be adjusted on entering loop - orrne r10,r10, #1<<23 @ r10=(cells[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) + orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) movne r3, #0x40 @ default to shadowed pal on sh mode - mvn r9, #0 @ r9=prevcode=-1 + cmp r7, #8 + subne r10,r10, #0x01000000 @ have hscroll, start with negative cell + + and r9, r9, #0xff00 + add r8, r8, r9, lsr #8 @ tilex+=cellskip + add r7, r7, r9, lsr #5 @ dx+=cellskip<<3; + add r10,r10,r9, lsl #16 @ cell+=cellskip @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL ldr r11,=HighCol mov r0, #0xf - add r1, r11, r7 @ r1=pdest - - cmp r7, #8 - subne r10,r10, #0x01000000 @ have hscroll, start with negative cell + ldr r11,[r11] +.else + ldr r11,=HighCol + mov r0, #0xf +.endif + mvn r9, #0 @ r9=prevcode=-1 + add r1, r11, r7 @ r1=pdest @ r4 & r7 are scratch in this loop .dsloop_vs_subr1: @@ -463,7 +565,7 @@ DrawLayer: add r10,r10, #0x01000000 and r4, r10, #0x003f0000 cmp r4, r10, asr #8 - ble .dsloop_exit + ble .dsloop_vs_exit @ calc offset and read tileline code to r7, also calc ty add r7, lr, #0x012000 @@ -475,9 +577,9 @@ DrawLayer: ldrh r7, [r7] @ r7=vscroll bic r10,r10,#0xff @ clear old ty - and r4, r5, #0xff0000 - add r4, r4, r7, lsl #16 - and r4, r4, r5, lsl #16 @ r4=line<<16 + and r4, r5, #0xff0000 @ scanline + add r4, r4, r7, lsl #16 @ ... += vscroll + and r4, r4, r5, lsl #16 @ ... &= ymask and r7, r4, #0x70000 orr r10,r10,r7, lsr #15 @ new ty @@ -500,6 +602,7 @@ DrawLayer: beq .DrawStrip_vs_samecode @ we know stuff about this tile already mov r9, r7 @ remember code + orr r8, r8, #(1<<24)@ seen non hi-prio tile movs r2, r9, lsl #20 @ if (code&0x1000) mov r2, r2, lsl #1 @@ -521,16 +624,17 @@ DrawLayer: beq .DrawStrip_vs_SingleColor @ tileline singlecolor tst r9, #0x0800 - beq .DrawStrip_vs_TileNorm + bne .DrawStrip_vs_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern - TileFlip r0 - b .dsloop_vs - .DrawStrip_vs_TileNorm: TileNorm r0 b .dsloop_vs +.DrawStrip_vs_TileFlip: + TileFlip r0 + b .dsloop_vs + .DrawStrip_vs_SingleColor: and r4, r2, #0xf orr r4, r3, r4 @@ -571,6 +675,18 @@ DrawLayer: bne .DrawStrip_vs_hiprio b .dsloop_vs +.dsloop_vs_exit: + tst r8, #(1<<24) @ seen non hi-prio tile + ldreq r1, =rendstatus + mov r0, #0 + ldreq r2, [r1] + str r0, [r6] @ terminate the cache list + orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles + streq r2, [r1] + + ldmfd sp!, {r4-r11,lr} + bx lr + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -581,7 +697,7 @@ DrawLayer: movne r7, r7, lsl #5 @ Find the line in the name table - add r2, r7, r2, lsl #22 @ r2=(vscroll+(Scanline<<1))<<21 (11 bits); + add r2, r7, r2, lsl #22 @ r2=(vscroll+(DrawScanline<<1))<<21 (11 bits); orr r1, r1, #0x80000000 and r2, r2, r1, ror #10 @ &((ymask<<1)|1)<<21; mov r2, r2, lsr #21 @@ -610,10 +726,18 @@ DrawLayer: BackFill: stmfd sp!, {r4-r9,lr} +.if OVERRIDE_HIGHCOL + ldr lr, =HighCol + mov r0, r0, lsl #26 + ldr lr, [lr] + mov r0, r0, lsr #26 + add lr, lr, #8 +.else ldr lr, =(HighCol+8) - mov r0, r0, lsl #26 mov r0, r0, lsr #26 +.endif + orr r0, r0, r1, lsl #6 orr r0, r0, r0, lsl #8 orr r0, r0, r0, lsl #16 @@ -645,26 +769,34 @@ BackFill: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawTilesFromCache @ int *hc, int sh +.global DrawTilesFromCache @ int *hc, int sh, int rlim DrawTilesFromCache: stmfd sp!, {r4-r8,r11,lr} - mvn r5, #0 @ r5=prevcode=-1 - mov r8, r1 - @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r12,#0xf + ldr r11,[r11] +.else ldr r11,=HighCol - ldr lr, =(Pico+0x10000) @ lr=Pico.vram mov r12,#0xf +.endif + ldr lr, =(Pico+0x10000) @ lr=Pico.vram + + mvn r5, #0 @ r5=prevcode=-1 + ands r8, r1, #1 + orr r8, r8, r2, lsl #1 + bne .dtfc_check_rendflags @ scratch: r4, r7 .dtfc_loop: ldr r6, [r0], #4 @ read code movs r1, r6, lsr #16 @ r1=dx; ldmeqfd sp!, {r4-r8,r11,pc} @ dx is never zero, this must be a terminator, return - bic r1, r1, #0xfe00 - add r1, r11, r1 @ r1=pdest + bic r4, r1, #0xfe00 + add r1, r11, r4 @ r1=pdest mov r7, r6, lsl #16 cmp r5, r7, lsr #16 @@ -682,7 +814,10 @@ DrawTilesFromCache: ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels .dtfc_samecode: - tst r8, r8 + rsbs r4, r4, r8, lsr #1 + bmi .dtfc_cut_tile + + tst r8, #1 bne .dtfc_shadow tst r2, r2 @@ -692,16 +827,17 @@ DrawTilesFromCache: beq .dtfc_SingleColor @ tileline singlecolor tst r5, #0x0800 - beq .dtfc_TileNorm + bne .dtfc_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlip r12 - b .dtfc_loop - .dtfc_TileNorm: TileNorm r12 b .dtfc_loop +.dtfc_TileFlip: + TileFlip r12 + b .dtfc_loop + .dtfc_SingleColor: and r4, r2, #0xf orr r4, r3, r4 @@ -723,50 +859,86 @@ DrawTilesFromCache: beq .dtfc_SingleColor @ tileline singlecolor tst r5, #0x0800 - beq .dtfc_TileNormShHP + bne .dtfc_TileFlipShHP @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlipShHP - b .dtfc_loop - .dtfc_TileNormShHP: TileNormShHP b .dtfc_loop +.dtfc_TileFlipShHP: + TileFlipShHP + b .dtfc_loop + .dtfc_shadow_blank: - ldrb r4, [r1] @ 1ci - ldrb r12,[r1,#1] - tst r4, #0x80 - andeq r4, r4,#0x3f - streqb r4, [r1] - tst r12,#0x80 - ldrb r4, [r1,#2] - andeq r12,r12,#0x3f - streqb r12,[r1,#1] - tst r4, #0x80 - ldrb r12,[r1,#3] - andeq r4, r4,#0x3f - streqb r4, [r1,#2] - tst r12,#0x80 - ldrb r4, [r1,#4] - andeq r12,r12,#0x3f - streqb r12,[r1,#3] - tst r4, #0x80 - ldrb r12,[r1,#5] - andeq r4, r4,#0x3f - streqb r4, [r1,#4] - tst r12,#0x80 - ldrb r4, [r1,#6] - andeq r12,r12,#0x3f - streqb r12,[r1,#5] - tst r4, #0x80 - ldrb r12,[r1,#7] - andeq r4, r4,#0x3f - streqb r4, [r1,#6] - tst r12,#0x80 - andeq r12,r12,#0x3f - streqb r12,[r1,#7] - mov r12, #0xf + tst r1, #1 + ldrneb r4, [r1] + mov r6, #0x3f + and r4, r4, #0x3f + strneb r4, [r1], #1 + ldrh r4, [r1] + orr r6, r6, r6, lsl #8 + and r4, r4, r6 + strh r4, [r1], #2 + ldrh r4, [r1] + and r4, r4, r6 + strh r4, [r1], #2 + ldrh r4, [r1] + and r4, r4, r6 + strh r4, [r1], #2 + ldrh r4, [r1] + and r4, r4, r6 + streqh r4, [r1] + strneb r4, [r1] + b .dtfc_loop + +.dtfc_cut_tile: + add r4, r4, #7 @ 0-6 + mov r4, r4, lsl #2 + mov r12,#0xf<<28 + mov r12,r12,asr r4 + mov r2, r2, ror #16 + tst r5, #0x0800 @ flipped? + mvnne r12,r12 + and r2, r2, r12 + mov r2, r2, ror #16 + mov r12,#0xf + tst r8, #1 + bne .dtfc_shadow + tst r2, r2 + beq .dtfc_loop + tst r5, #0x0800 + beq .dtfc_TileNorm + b .dtfc_TileFlip + +@ check if we have detected layer covered with hi-prio tiles: +.dtfc_check_rendflags: + ldr r1, =rendstatus + ldr r2, [r1] + tst r2, #(PDRAW_PLANE_HI_PRIO|PDRAW_SHHI_DONE) + beq .dtfc_loop + bic r8, r8, #1 @ sh/hi mode off + tst r2, #PDRAW_SHHI_DONE + bne .dtfc_loop @ already processed + orr r2, r2, #PDRAW_SHHI_DONE + str r2, [r1] + + add r1, r11,#8 + mov r3, #320/4/4 + mov r6, #0x3f + orr r6, r6, r6, lsl #8 + orr r6, r6, r6, lsl #16 +.dtfc_loop_shprep: + ldmia r1, {r2,r4,r5,r7} + subs r3, r3, #1 + and r2, r2, r6 + and r4, r4, r6 + and r5, r5, r6 + and r7, r7, r6 + stmia r1!,{r2,r4,r5,r7} + bne .dtfc_loop_shprep + + mvn r5, #0 @ r5=prevcode=-1 b .dtfc_loop .pool @@ -774,141 +946,246 @@ DrawTilesFromCache: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawSpritesFromCache @ int *hc, int sh +.global DrawSpritesSHi @ unsigned char *sprited + +DrawSpritesSHi: + ldr r3, [r0] + mov r12,#0xff + ands r3, r3, #0x7f + bxeq lr -DrawSpritesFromCache: stmfd sp!, {r4-r11,lr} + strb r12,[r0,#2] @ set end marker + add r10,r0, #3 @ r10=HighLnSpr end + add r10,r10,r3 @ r10=HighLnSpr end - @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r12,#0xf + ldr r11,[r11] +.else ldr r11,=HighCol - ldr lr, =(Pico+0x10000) @ lr=Pico.vram - mov r6, r1, lsl #31 - orr r6, r6, #1<<30 mov r12,#0xf +.endif + ldr lr, =(Pico+0x10000) @ lr=Pico.vram - mov r10, r0 -.dsfc_loop: - ldr r9, [r10], #4 @ read code - bic r6, r6, #7 @ using pipeline - tst r9, r9 - ldmeqfd sp!, {r4-r11,pc} +DrawSpriteSHi: + @ draw next sprite + ldrb r0, [r10,#-1]! + ldr r1, =HighPreSpr +@ ldr r8, [sp, #-4] + cmp r0, #0xff + ldmeqfd sp!, {r4-r11,pc} @ end of list + and r0, r0, #0x7f + add r0, r1, r0, lsl #3 - mov r4, r9, lsl #28 - orr r6, r6, r4, lsr #30 - add r6, r6, #1 @ r6=s1cc???? ... ?????www (s=shadow/hilight, cc=pal, w=width) + ldr r9, [r0, #4] @ sprite[1] + mov r2, r9, asr #16 @ r2=sx - and r5, r9, #3 - add r5, r5, #1 @ r5=delta - tst r9, #0x10000 - rsbne r5, r5, #0 @ Flip X - mov r5, r5, lsl #4 + mov r9, r9, lsl #16 + mov r3, r9, lsr #31 @ priority + mov r9, r9, lsr #16 +@ orr r9, r9, r8, lsl #31 @ r9=code|sh[31] @@ sh is always on here now + and r4, r9, #0x6000 + orr r9, r9, r4, lsl #16 + orr r9, r9, #0x90000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) + cmp r12,r9, lsr #28 @ sh/hi with pal3? + cmpne r3, #1 @ if not, is ir hi prio? + bne DrawSpriteSHi @ non-operator low sprite, already drawn - mov r2, r9, lsr #17 - mov r8, r2, lsl #1 @ tile=((unsigned int)code>>17)<<1; + ldr r3, [r0] @ sprite[0] + ldr r7, =DrawScanline + mov r6, r3, lsr #28 + sub r6, r6, #1 @ r6=width-1 (inc later) + mov r5, r3, lsr #24 + and r5, r5, #7 @ r5=height - and r3, r9, #0x30 @ r3=pal=(code&0x30); + mov r0, r3, lsl #16 @ r4=sy<<16 (tmp) - bic r6, r6, #3<<28 - orr r6, r6, r3, lsl #24 + ldr r7, [r7] + sub r7, r7, r0, asr #16 @ r7=row=DrawScanline-sy - mov r0, r9, lsl #16 - mov r0, r0, asr #22 @ sx=(code<<16)>>22 - adds r0, r0, #0 @ set ZV - b .dsfc_inloop_enter + tst r9, #0x1000 + movne r0, r5, lsl #3 + subne r0, r0, #1 + subne r7, r0, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y -@ scratch: r4, r7 -.dsfc_inloop: - sub r6, r6, #1 - tst r6, #7 - beq .dsfc_loop - adds r0, r0, #8 - add r8, r8, r5 - -.dsfc_inloop_enter: - ble .dsfc_inloop + add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down + tst r9, #0x0800 + mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1); + rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now + + mov r8, r8, lsl #21 + mov r8, r8, lsr #17 + and r7, r7, #7 + add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address + + mov r5, r5, lsl #4 @ delta<<=4; // Delta of address + mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); + + add r6, r6, #1 @ inc now + adds r0, r2, #0 @ mov sx to r0 and set ZV flags + b .dsprShi_loop_enter + +.dsprShi_loop: + subs r6, r6, #1 @ width-- + beq DrawSpriteSHi + adds r0, r0, #8 @ sx+=8 + add r8, r8, r5 @ tile+=delta + +.dsprShi_loop_enter: + ble .dsprShi_loop @ sx <= 0 cmp r0, #328 - bge .dsfc_loop + bge DrawSpriteSHi mov r8, r8, lsl #17 - mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address + mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address - ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+tile); // Get 8 pixels - add r1, r11, r0 @ r1=pdest + ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + add r1, r11, r0 @ r1=pdest tst r2, r2 - beq .dsfc_inloop + beq .dsprShi_loop - cmp r12, r6, lsr #28 - beq .dsfc_shadow + cmp r12, r9, lsr #28 + beq .dsprShi_shadow cmp r2, r2, ror #4 - beq .dsfc_SingleColor @ tileline singlecolor - - tst r9, #0x10000 - beq .dsfc_TileNorm + beq .dsprShi_SingleColor @ tileline singlecolor - @ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlip r12 - b .dsfc_inloop + tst r9, #0x0800 + bne .dsprShi_TileFlip -.dsfc_TileNorm: + @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern +@ scratch: r4, r7 +.dsprShi_TileNorm: TileNorm r12 - b .dsfc_inloop + b .dsprShi_loop -.dsfc_SingleColor: - tst r0, #1 @ not aligned? +.dsprShi_TileFlip: + TileFlip r12 + b .dsprShi_loop + +.dsprShi_SingleColor: and r4, r2, #0xf orr r4, r3, r4 orr r4, r4, r4, lsl #8 + tst r0, #1 @ not aligned? strneb r4, [r1], #1 streqh r4, [r1], #2 strh r4, [r1], #2 strh r4, [r1], #2 strh r4, [r1], #2 strneb r4, [r1], #1 - b .dsfc_inloop + b .dsprShi_loop + +.dsprShi_shadow: + tst r9, #0x8000 + beq .dsprShi_shadow_lowpri -.dsfc_shadow: cmp r2, r2, ror #4 - beq .dsfc_singlec_sh + beq .dsprShi_singlec_sh - tst r9, #0x10000 - beq .dsfc_TileNorm_sh + tst r9, #0x0800 + bne .dsprShi_TileFlip_sh @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlipSh - b .dsfc_inloop - -.dsfc_TileNorm_sh: +.dsprShi_TileNorm_sh: TileNormSh - b .dsfc_inloop + b .dsprShi_loop + +.dsprShi_TileFlip_sh: + TileFlipSh + b .dsprShi_loop -.dsfc_singlec_sh: +.dsprShi_singlec_sh: cmp r2, #0xe0000000 - bcc .dsfc_SingleColor @ normal singlecolor tileline (carry inverted in ARM) + bcc .dsprShi_SingleColor @ normal singlecolor tileline (carry inverted in ARM) tst r2, #0x10000000 - bne .dsfc_sh_sh + bne .dsprShi_sh_sh TileSingleHi - b .dsfc_inloop + b .dsprShi_loop -.dsfc_sh_sh: +.dsprShi_sh_sh: TileSingleSh - b .dsfc_inloop + b .dsprShi_loop + +.dsprShi_shadow_lowpri: + tst r9, #0x800 + bne .dsprShi_TileFlip_sh_lp + +.dsprShi_TileNorm_sh_lp: + TileNormSh_onlyop_lp + b .dsprShi_loop + +.dsprShi_TileFlip_sh_lp: + TileFlipSh_onlyop_lp + b .dsprShi_loop .pool @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -@ + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size -@ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 +.global DrawAllSprites @ unsigned char *sprited, int prio, int sh + +DrawAllSprites: + ldr r3, =rendstatus + orr r1, r2, r1, lsl #1 + ldr r12,[r3] + tst r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED) + beq das_no_prep + stmfd sp!, {r0,r1,lr} + and r0, r12,#PDRAW_DIRTY_SPRITES + bic r12,r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED) + str r12,[r3] + bl PrepareSprites + ldmfd sp!, {r0,r1,lr} + +das_no_prep: + ldr r3, [r0] + ands r3, r3, #0x7f + bxeq lr + + @ time to do some real work + stmfd sp!, {r4-r11,lr} + mov r12,#0xff + strb r12,[r0,#2] @ set end marker + add r10,r0, #3 + add r10,r10,r3 @ r10=HighLnSpr end + + str r1, [sp, #-4] @ no calls after this point + +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r12,#0xf + ldr r11,[r11] +.else + ldr r11,=HighCol + mov r12,#0xf +.endif + ldr lr, =(Pico+0x10000) @ lr=Pico.vram -.global DrawSprite @ unsigned int *sprite, int **hc, int sh +@ + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: horiz. size +@ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -DrawSprite: - stmfd sp!, {r4-r9,r11,lr} +DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites + @ draw next sprite + ldrb r0, [r10,#-1]! + ldr r1, =HighPreSpr + ldr r8, [sp, #-4] + mov r2, r0, lsr #7 + cmp r0, #0xff + ldmeqfd sp!, {r4-r11,pc} @ end of list + cmp r2, r8, lsr #1 + bne DrawSprite @ wrong priority + and r0, r0, #0x7f + add r0, r1, r0, lsl #3 + +@ stmfd sp!, {r4-r9,r11,lr} +@ orr r8, r2, r1, lsl #4 ldr r3, [r0] @ sprite[0] - ldr r7, =Scanline + ldr r7, =DrawScanline mov r6, r3, lsr #28 sub r6, r6, #1 @ r6=width-1 (inc later) mov r5, r3, lsr #24 @@ -918,46 +1195,37 @@ DrawSprite: ldr r7, [r7] ldr r9, [r0, #4] - sub r7, r7, r4, asr #16 @ r7=row=Scanline-sy + sub r7, r7, r4, asr #16 @ r7=row=DrawScanline-sy - tst r2, r2 mov r2, r9, asr #16 @ r2=sx - bic r9, r9, #0xfe000000 - orrne r9, r9, #1<<31 @ r9=code|(sh<<31) + mov r9, r9, lsl #16 + mov r9, r9, lsr #16 + orr r9, r9, r8, lsl #31 @ r9=code|sh[31] tst r9, #0x1000 movne r4, r5, lsl #3 subne r4, r4, #1 subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y - mov r8, r9, lsl #21 - mov r8, r8, lsr #21 - add r8, r8, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down - + add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down tst r9, #0x0800 mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1); rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now - mov r8, r8, lsl #4 + mov r8, r8, lsl #21 + mov r8, r8, lsr #17 and r7, r7, #7 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address - tst r9, #0x8000 - bne .dspr_cache @ if(code&0x8000) // high priority - cache it - +.dspr_continue: @ cache some stuff to avoid mem access - ldr r11,=HighCol - ldr lr, =(Pico+0x10000) @ lr=Pico.vram - mov r12,#0xf - mov r5, r5, lsl #4 @ delta<<=4; // Delta of address and r4, r9, #0x6000 orr r9, r9, r4, lsl #16 - orr r9, r9, #0x10000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) + orrs r9, r9, #0x10000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) - tst r9, #1<<31 mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); - orrne r3, r3, #0x40 @ shadow by default + orrmi r3, r3, #0x40 @ for sh/hi add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags @@ -965,14 +1233,14 @@ DrawSprite: .dspr_loop: subs r6, r6, #1 @ width-- - ldmeqfd sp!, {r4-r9,r11,pc}@ return + beq DrawSprite adds r0, r0, #8 @ sx+=8 add r8, r8, r5 @ tile+=delta .dspr_loop_enter: ble .dspr_loop @ sx <= 0 cmp r0, #328 - ldmgefd sp!, {r4-r9,r11,pc}@ return + bge DrawSprite mov r8, r8, lsl #17 mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address @@ -989,17 +1257,22 @@ DrawSprite: beq .dspr_SingleColor @ tileline singlecolor tst r9, #0x0800 - beq .dspr_TileNorm + bne .dspr_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlip r12 - b .dspr_loop - @ scratch: r4, r7 .dspr_TileNorm: TileNorm r12 b .dspr_loop +.dspr_TileFlip: + TileFlip r12 + b .dspr_loop + +.dspr_singlec_sh: + cmp r2, #0xe0000000 + bcs .dspr_loop @ operator tileline, ignore + .dspr_SingleColor: and r4, r2, #0xf orr r4, r3, r4 @@ -1018,48 +1291,18 @@ DrawSprite: beq .dspr_singlec_sh tst r9, #0x0800 - beq .dspr_TileNorm_sh + bne .dspr_TileFlip_sh @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern - TileFlipSh - b .dspr_loop - .dspr_TileNorm_sh: - TileNormSh - b .dspr_loop - -.dspr_singlec_sh: - cmp r2, #0xe0000000 - bcc .dspr_SingleColor @ normal tileline - tst r2, #0x10000000 - bne .dspr_sh_sh - TileSingleHi + TileNormSh_noop b .dspr_loop -.dspr_sh_sh: - TileSingleSh +.dspr_TileFlip_sh: + TileFlipSh_noop b .dspr_loop -.dspr_cache: - @ *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>24)&0xf); - mov r4, r8, lsl #16 @ tile - tst r9, #0x0800 - orrne r4, r4, #0x10000 @ code&0x0800 - mov r2, r2, lsl #22 - orr r4, r4, r2, lsr #16 @ (sx<<6)&0x0000ffc0 - and r2, r9, #0x6000 - orr r4, r4, r2, lsr #9 @ (code>>9)&0x30 - mov r3, r3, lsl #12 - ldr r2, [r1] - orr r4, r4, r3, lsr #28 @ (sprite[0]>>24)&0xf - - str r4, [r2], #4 - str r2, [r1] - - ldmfd sp!, {r4-r9,r11,lr} - bx lr - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .global DrawWindow @ int tstart, int tend, int prio, int sh // int *hcache @@ -1068,7 +1311,7 @@ DrawWindow: stmfd sp!, {r4-r11,lr} ldr r11, =(Pico+0x22228) @ Pico.video - ldr r10, =Scanline + ldr r10, =DrawScanline ldrb r12, [r11, #3] @ pvid->reg[3] ldr r10, [r10] @@ -1087,34 +1330,40 @@ DrawWindow: add r12, r12, r0, lsl #2 @ +starttile ldr r6, =rendstatus - ldr lr, =(Pico+0x10000) @ lr=Pico.vram - ldrb r6, [r6] + ldr lr, =(Pico+0x10000) @ lr=Pico.vram + ldr r6, [r6] @ fetch the first code now ldrh r7, [lr, r12] - ands r6, r6, #2 @ we care about bit 1 only + ands r6, r6, #PDRAW_WND_DIFF_PRIO orr r6, r6, r2 - bne .dw_no_sameprio - cmp r2, r7, lsr #15 - ldmnefd sp!, {r4-r11,pc} @ assume that whole window uses same priority + eoreq r8, r2, r7, lsr #15 @ do prio bits differ? + cmpeq r8, #1 + ldmeqfd sp!, {r4-r11,pc} @ yes, assume that whole window uses same priority -.dw_no_sameprio: orr r6, r6, r3, lsl #8 @ shadow mode sub r8, r1, r0 - mov r8, r8, lsl #1 @ cells - - mvn r9, #0 @ r9=prevcode=-1 @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r8, r8, lsl #1 @ cells + ldr r11,[r11] + mvn r9, #0 @ r9=prevcode=-1 + add r11,r11,#8 +.else ldr r11,=(HighCol+8) - add r1, r11, r0, lsl #4 @ r1=pdest + mov r8, r8, lsl #1 @ cells + mvn r9, #0 @ r9=prevcode=-1 +.endif + add r1, r11, r0, lsl #4 @ r1=pdest mov r0, #0xf b .dwloop_enter - @ r4,r5 & r7 are scratch in this loop + @ r4,r5 are scratch in this loop .dwloop: add r1, r1, #8 .dwloop_nor1: @@ -1155,16 +1404,17 @@ DrawWindow: beq .dw_SingleColor @ tileline singlecolor tst r9, #0x0800 - beq .dw_TileNorm + bne .dw_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern - TileFlip r0 - b .dwloop - .dw_TileNorm: TileNorm r0 b .dwloop +.dw_TileFlip: + TileFlip r0 + b .dwloop + .dw_SingleColor: and r4, r0, r2 @ #0x0000000f orr r4, r3, r4 @@ -1179,31 +1429,20 @@ DrawWindow: orreq r3, r3, #0x40 beq .dw_shadow_done ldr r4, [r1] - tst r4, #0x00000080 - biceq r4, r4, #0x000000c0 - tst r4, #0x00008000 - biceq r4, r4, #0x0000c000 - tst r4, #0x00800000 - biceq r4, r4, #0x00c00000 - tst r4, #0x80000000 - biceq r4, r4, #0xc0000000 + mov r5, #0x3f + orr r5, r5, r5, lsl #8 + orr r5, r5, r5, lsl #16 + and r4, r4, r5 str r4, [r1] ldr r4, [r1,#4] - tst r4, #0x00000080 - biceq r4, r4, #0x000000c0 - tst r4, #0x00008000 - biceq r4, r4, #0x0000c000 - tst r4, #0x00800000 - biceq r4, r4, #0x00c00000 - tst r4, #0x80000000 - biceq r4, r4, #0xc0000000 + and r4, r4, r5 str r4, [r1,#4] b .dw_shadow_done .dwloop_end: ldr r0, =rendstatus ldr r1, [r0] - and r6, r6, #2 + and r6, r6, #PDRAW_WND_DIFF_PRIO orr r1, r1, r6 str r1, [r0] @@ -1306,15 +1545,29 @@ FinalizeLineBGR444: bne .fl_loopcpBGR444_hi sub r3, r4, #0x40*3*2 + mov r6, #1 .fl_noshBGR444: - ldr r1, =(HighCol+8) + ldr r12,=rendstatus + eors r6, r6, #1 @ sh is 0 + ldr r12,[r12] mov lr, #0xff + tstne r12,#PDRAW_ACC_SPRITES + +.if OVERRIDE_HIGHCOL + ldr r1, =HighCol + movne lr, #0x3f + ldr r1, [r1] mov lr, lr, lsl #1 + add r1, r1, #8 +.else + ldr r1, =(HighCol+8) + movne lr, #0x3f + mov lr, lr, lsl #1 +.endif .fl_loopBGR444: - ldr r12, [r1], #4 subs r2, r2, #1 @@ -1324,11 +1577,10 @@ FinalizeLineBGR444: ldrh r5, [r3, r5] and r6, lr, r12, lsr #15 ldrh r6, [r3, r6] + and r12,lr, r12, lsr #23 + ldrh r12,[r3, r12] @ 1c.i. orr r4, r4, r5, lsl #16 - - and r5, lr, r12, lsr #23 - ldrh r5, [r3, r5] @ 2c.i. - orr r5, r6, r5, lsl #16 + orr r5, r6, r12,lsl #16 stmia r0!, {r4,r5} bne .fl_loopBGR444 @@ -1381,14 +1633,16 @@ FinalizeLineBGR444: orr \reg, \reg, r3 @ add blue back .endm +.global vidConvCpyRGB565 + vidConvCpyRGB565: @ void *to, void *from, int pixels stmfd sp!, {r4-r9,lr} - mov r12, r2, lsr #3 @ repeats + mov r12, r2, lsr #3 @ repeats mov lr, #0x001c0000 orr lr, lr, #0x01c @ lr == pattern 0x001c001c mov r8, #0x00030000 - orr r8, r8, #0x003 @ lr == pattern 0x001c001c + orr r8, r8, #0x003 .loopRGB565: ldmia r1!, {r4-r7} @@ -1461,14 +1715,29 @@ FinalizeLineRGB555: bne .fl_loopcpRGB555_hi sub r3, r3, #0x40*2 + mov r6, #1 .fl_noshRGB555: + ldr r12,=rendstatus + eors r6, r6, #1 @ sh is 0 + ldr r12,[r12] + mov lr, #0xff + tstne r12,#PDRAW_ACC_SPRITES + movne lr, #0x3f + +.if OVERRIDE_HIGHCOL + ldr r1, =HighCol + ldr r0, =DrawLineDest + ldr r1, [r1] + ldr r0, [r0] + add r1, r1, #8 +.else ldr r0, =DrawLineDest ldr r1, =(HighCol+8) ldr r0, [r0] +.endif ldrb r12, [r8, #12] - mov lr, #0xff mov lr, lr, lsl #1 tst r12, #1 @@ -1483,8 +1752,14 @@ FinalizeLineRGB555: addeq r0, r0, #32*2 .fl_no32colRGB555: -.fl_loopRGB555: +.if UNALIGNED_DRAWLINEDEST + @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer + tst r0, #2 + bne .fl_RGB555u +.endif + +.fl_loopRGB555: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1506,12 +1781,12 @@ FinalizeLineRGB555: ldrh r6, [r3, r6] and r12,lr, r7, lsr #15 ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] orr r8, r8, r6, lsl #16 - and r6, lr, r7, lsr #23 - ldrh r6, [r3, r6] @ 1 cycle interlock here (r6) subs r2, r2, #1 - orr r12,r12, r6, lsl #16 + orr r12,r12, r7, lsl #16 stmia r0!, {r4,r5,r8,r12} bne .fl_loopRGB555 @@ -1525,6 +1800,11 @@ FinalizeLineRGB555: mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 orr r9, r9, #0x00e7 +.if UNALIGNED_DRAWLINEDEST + tst r0, #2 + bne .fl_32scale_RGB555u +.endif + .fl_loop32scale_RGB555: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1581,6 +1861,121 @@ FinalizeLineRGB555: ldmfd sp!, {r4-r8,lr} bx lr +.if UNALIGNED_DRAWLINEDEST + @ unaligned versions of loops + @ warning: starts drawing 2bytes before dst + +.fl_RGB555u: + sub r0, r0, #2 @ initial adjustment + mov r8, #0 + +.fl_loopRGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r6, lr, r12,lsl #1 + ldrh r6, [r3, r6] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + orr r4, r8, r6, lsl #16 + + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r8, lr, r12,lsr #23 + ldrh r8, [r3, r8] + orr r5, r5, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r12,lr, r7, lsr #7 + ldrh r12,[r3, r12] + orr r6, r8, r6, lsl #16 + + and r8, lr, r7, lsr #15 + ldrh r8, [r3, r8] + and r7, lr, r7, lsr #23 + + subs r2, r2, #1 + orr r12,r12,r8, lsl #16 + ldrh r8, [r3, r7] + + stmia r0!, {r4,r5,r6,r12} + bne .fl_loopRGB555u + + strh r8, [r0], #2 + + ldmfd sp!, {r4-r8,lr} + bx lr + + +.fl_32scale_RGB555u: + sub r0, r0, #2 @ initial adjustment + mov r4, #0 + + @ r9 f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 +.fl_loop32scale_RGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r6, lr, r12,lsl #1 + ldrh r6, [r3, r6] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + and r6, r6, r9, lsl #2 + orr r4, r4, r6, lsl #16 @ r4 = pix_d -1, 0 + + and r5, r5, r9, lsl #2 + sub r8, r5, r5, lsr #2 @ r8 = 3/4 pix_s 1 + add r6, r8, r6, lsr #2 @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1) + orr r5, r6, r5, lsl #15 + + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r12,lr, r12,lsr #23 + ldrh r12,[r3, r12] + and r6, r6, r9, lsl #2 + add r5, r5, r6, lsl #15 @ r5 = pix_d 1, 2 + + and r8, lr, r7, lsl #1 + ldrh r8, [r3, r8] + and r10,lr, r7, lsr #7 + ldrh r10,[r3, r10] + and r12,r12,r9, lsl #2 + sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 + add r6, r6, r12,lsr #2 + orr r6, r6, r12,lsl #16 @ r6 = pix_d 3, 4 + + and r8, r8, r9, lsl #2 + and r10,r10,r9, lsl #2 + sub r12,r10,r10,lsr #2 @ r12 = 3/4 pix_s 5 + orr r8, r8, r8, lsl #14 + add r8, r8, r12,lsl #16 @ r8 = pix_d 5, 6 + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + and r12,r12,r9, lsl #2 + add r10,r10,r12 + mov r10,r10, lsr #1 + sub r12,r12,r12,lsr #2 @ r12 = 3/4 pix_s 6 + orr r10,r10,r12,lsl #16 + and r7, r7, r9, lsl #2 + add r10,r10,r7, lsl #14 @ r10 = pix_d 7, 8 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + mov r4, r7 + bne .fl_loop32scale_RGB555u + + strh r4, [r0], #2 + + ldmfd sp!, {r9,r10} + ldmfd sp!, {r4-r8,lr} + bx lr + +.endif @ UNALIGNED_DRAWLINEDEST + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@