From 740da8c60b40ec09256e438f849f766b150d29d9 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 2 Sep 2007 14:52:01 +0000 Subject: [PATCH] workaround for all-tiles-hi-priority performance issue git-svn-id: file:///home/notaz/opt/svn/PicoDrive@237 be3aeb3a-fb24-0410-a615-afba39da0efa --- Pico/Draw.c | 85 +++++++++++++++++++++++++++++++++++++--------- Pico/Draw.s | 98 ++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 147 insertions(+), 36 deletions(-) diff --git a/Pico/Draw.c b/Pico/Draw.c index 051b16d..c43f7f0 100644 --- a/Pico/Draw.c +++ b/Pico/Draw.c @@ -22,8 +22,8 @@ static int HighCacheS[80+1]; // and sprites static int HighPreSpr[80*2+1]; // slightly preprocessed sprites char HighSprZ[320+8+8]; // Z-buffer for accurate sprites and shadow/hilight mode // (if bit 7 == 0, sh caused by tile; if bit 6 == 0 pixel must be shadowed, else hilighted, if bit5 == 1) -// lsb->msb: moved sprites, all window tiles don't use same priority, accurate sprites (copied from PicoOpt), interlace -// dirty sprites, sonic mode +// lsb->msb: moved sprites, not all window tiles use same priority, accurate sprites (copied from PicoOpt), interlace +// dirty sprites, sonic mode, have layer with all hi prio tiles (mk3), layer sh/hi already processed int rendstatus; void *DrawLineDest=DefOutBuff; // pointer to dest buffer where to draw this line to int Scanline=0; // Scanline @@ -316,6 +316,8 @@ static void DrawStrip(struct TileStrip *ts, int sh) // terminate the cache list *ts->hc = 0; + // if oldcode wasn't changed, it means all layer is hi priority + if (oldcode == -1) rendstatus|=0x40; } // this is messy @@ -381,6 +383,7 @@ void DrawStripVSRam(struct TileStrip *ts, int plane) // terminate the cache list *ts->hc = 0; + if (oldcode == -1) rendstatus|=0x40; } #endif @@ -578,33 +581,72 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache static void DrawTilesFromCache(int *hc, int sh) { - int code, addr, zero, dx; + int code, addr, dx; int pal; - short blank=-1; // The tile we know is blank // *ts->hc++ = code | (dx<<16) | (ty<<25); // cache it - while((code=*hc++)) { - if(!sh && (short)code == blank) continue; + if (sh && (rendstatus&0xc0)) + { + if (!(rendstatus&0x80)) + { + // as some layer has covered whole line with hi priority tiles, + // we can process whole line and then act as if sh/hi mode was off. + rendstatus|=0x80; + int c = 320/4, *zb = (int *)(HighCol+8); + while (c--) + { + int tmp = *zb; + if (!(tmp & 0x80808080)) *zb=tmp&0x3f3f3f3f; + else { + if(!(tmp&0x00000080)) tmp&=~0x000000c0; if(!(tmp&0x00008000)) tmp&=~0x0000c000; + if(!(tmp&0x00800000)) tmp&=~0x00c00000; if(!(tmp&0x80000000)) tmp&=~0xc0000000; + *zb=tmp; + } + zb++; + } + } + sh = 0; + } - // Get tile address/2: - addr=(code&0x7ff)<<4; - addr+=(unsigned int)code>>25; // y offset into tile - dx=(code>>16)&0x1ff; - if(sh) { - unsigned char *zb = HighCol+dx; + if (sh) + { + while((code=*hc++)) { + unsigned char *zb; + // Get tile address/2: + addr=(code&0x7ff)<<4; + addr+=(unsigned int)code>>25; // y offset into tile + dx=(code>>16)&0x1ff; + zb = HighCol+dx; if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; + + pal=((code>>9)&0x30); + + if (code&0x0800) TileFlip(dx,addr,pal); + else TileNorm(dx,addr,pal); } + } + else + { + short blank=-1; // The tile we know is blank + while((code=*hc++)) { + int zero; + if((short)code == blank) continue; + // Get tile address/2: + addr=(code&0x7ff)<<4; + addr+=(unsigned int)code>>25; // y offset into tile + dx=(code>>16)&0x1ff; - pal=((code>>9)&0x30); + pal=((code>>9)&0x30); - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); + if (code&0x0800) zero=TileFlip(dx,addr,pal); + else zero=TileNorm(dx,addr,pal); - if(zero) blank=(short)code; + if(zero) blank=(short)code; + } } } @@ -1189,6 +1231,8 @@ static int DrawDisplay(int sh) int win=0,edge=0,hvwind=0; int maxw, maxcells; + rendstatus&=~0xc0; + if(pvid->reg[12]&1) { maxw = 328; maxcells = 40; } else { @@ -1236,6 +1280,15 @@ static int DrawDisplay(int sh) if(HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh); DrawAllSprites(HighCacheS, maxw, 1, sh); +#if 0 + { + int *c, a, b; + for (a = 0, c = HighCacheA; *c; c++, a++); + for (b = 0, c = HighCacheB; *c; c++, b++); + printf("%i:%03i: a=%i, b=%i\n", Pico.m.frame_count, Scanline, a, b); + } +#endif + return 0; } diff --git a/Pico/Draw.s b/Pico/Draw.s index cb1a6fa..91a647c 100644 --- a/Pico/Draw.s +++ b/Pico/Draw.s @@ -306,7 +306,7 @@ DrawLayer: tst r9, #1<<31 mov r3, #0 - orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|ty) + orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty) movne r3, #0x40 @ default to shadowed pal on sh mode mvn r9, #0 @ r9=prevcode=-1 @@ -342,6 +342,7 @@ DrawLayer: beq .DrawStrip_samecode @ we know stuff about this tile already mov r9, r7 @ remember code + orr r10, r10, #1<<21 @ seen non hi-prio tile movs r2, r9, lsl #20 @ if (code&0x1000) mov r2, r2, lsl #1 @@ -386,18 +387,6 @@ DrawLayer: strneb r4, [r1], #1 @ have a remaining unaligned pixel? b .dsloop_subr1 -.DrawStrip_hiprio: - tst r10, #0x00c00000 - beq .DrawStrip_hiprio_maybempt - sub r0, r1, r11 - orr r7, r7, r0, lsl #16 - orr r7, r7, r10, lsl #25 @ (ty<<25) - tst r7, #0x1000 - eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26; - str r7, [r6], #4 @ cache hi priority tile - mov r0, #0xf - b .dsloop - .DrawStrip_hiprio_maybempt: cmp r7, r9 beq .dsloop @ must've been empty, otherwise we wouldn't get here @@ -409,13 +398,29 @@ DrawLayer: ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels mov r9, r7 @ remember code tst r2, r2 - orrne r10, r10, #1<<22 - bne .DrawStrip_hiprio + beq .dsloop + orr r10, r10, #1<<22 + +.DrawStrip_hiprio: + tst r10, #0x00c00000 + beq .DrawStrip_hiprio_maybempt + sub r0, r1, r11 + orr r7, r7, r0, lsl #16 + orr r7, r7, r10, lsl #25 @ (ty<<25) + tst r7, #0x1000 + eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26; + str r7, [r6], #4 @ cache hi priority tile + mov r0, #0xf b .dsloop .dsloop_exit: + tst r10, #1<<21 @ seen non hi-prio tile + ldreq r1, =rendstatus mov r0, #0 + ldreq r2, [r1] str r0, [r6] @ terminate the cache list + orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles + streq r2, [r1] ldmfd sp!, {r4-r11,lr} bx lr @@ -426,7 +431,7 @@ DrawLayer: rsb r8, r3, #0 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3 bic r8, r8, #0xff000000 - orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|tilex[15:0]) + orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[15:0]) ldr r4, =Scanline orr r5, r1, r10, lsl #24 @@ -463,7 +468,7 @@ DrawLayer: add r10,r10, #0x01000000 and r4, r10, #0x003f0000 cmp r4, r10, asr #8 - ble .dsloop_exit + ble .dsloop_vs_exit @ calc offset and read tileline code to r7, also calc ty add r7, lr, #0x012000 @@ -500,6 +505,7 @@ DrawLayer: beq .DrawStrip_vs_samecode @ we know stuff about this tile already mov r9, r7 @ remember code + orr r8, r8, #1<<24 @ seen non hi-prio tile movs r2, r9, lsl #20 @ if (code&0x1000) mov r2, r2, lsl #1 @@ -571,6 +577,18 @@ DrawLayer: bne .DrawStrip_vs_hiprio b .dsloop_vs +.dsloop_vs_exit: + tst r8, #1<<24 @ seen non hi-prio tile + ldreq r1, =rendstatus + mov r0, #0 + ldreq r2, [r1] + str r0, [r6] @ terminate the cache list + orreq r2, r2, #0x40 @ had a layer with all hi-prio tiles + streq r2, [r1] + + ldmfd sp!, {r4-r11,lr} + bx lr + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -650,14 +668,15 @@ BackFill: DrawTilesFromCache: stmfd sp!, {r4-r8,r11,lr} - mvn r5, #0 @ r5=prevcode=-1 - mov r8, r1 - @ cache some stuff to avoid mem access ldr r11,=HighCol ldr lr, =(Pico+0x10000) @ lr=Pico.vram mov r12,#0xf + mvn r5, #0 @ r5=prevcode=-1 + movs r8, r1 + bne .dtfc_check_rendflags + @ scratch: r4, r7 .dtfc_loop: ldr r6, [r0], #4 @ read code @@ -769,6 +788,45 @@ DrawTilesFromCache: mov r12, #0xf b .dtfc_loop +@ check if we have detected layer covered with hi-prio tiles: +.dtfc_check_rendflags: + ldr r1, =rendstatus + ldr r2, [r1] + tst r2, #0xc0 + beq .dtfc_loop + mov r8, #0 @ sh/hi mode off + tst r2, #0x80 + bne .dtfc_loop @ already processed + orr r2, r2, #0x80 + str r2, [r1] + + add r1, r11,#8 + mov r3, #320/4 + mov r7, #0x80 + orr r7, r7, r7, lsl #8 + orr r7, r7, r7, lsl #16 + mov r6, #0x3f + orr r6, r6, r6, lsl #8 + orr r6, r6, r6, lsl #16 +.dtfc_loop_shprep: + subs r3, r3, #1 + bmi .dtfc_loop @ done + ldr r2, [r1] + tst r2, r7 + andeq r2, r2, r6 + streq r2, [r1], #4 + beq .dtfc_loop_shprep + tst r2, #0x80000000 + biceq r2, r2, #0xc0000000 + tst r2, #0x00800000 + biceq r2, r2, #0x00c00000 + tst r2, #0x00008000 + biceq r2, r2, #0x0000c000 + tst r2, #0x00000080 + biceq r2, r2, #0x000000c0 + str r2, [r1], #4 + b .dtfc_loop_shprep + .pool @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -- 2.39.2