From: kub Date: Sun, 26 Jan 2020 19:40:07 +0000 (+0100) Subject: sprite rendering improvements for masking and limit edge cases X-Git-Tag: v2.00~797 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5f0d224e1853b9ff5872999c9689e1bef53f632e;p=picodrive.git sprite rendering improvements for masking and limit edge cases --- diff --git a/pico/draw.c b/pico/draw.c index 7fd93f8e..652b9df7 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -53,7 +53,11 @@ static int HighPreSpr[80*2+1]; // slightly preprocessed sprites #define SPRL_HAVE_LO 0x40 // *lo* #define SPRL_MAY_HAVE_OP 0x20 // may have operator sprites on the line #define SPRL_LO_ABOVE_HI 0x10 // low priority sprites may be on top of hi -unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; // sprite_count, ^flags, tile_count, [spritep]... +#define SPRL_HAVE_X 0x08 // have sprites with x != 0 +#define SPRL_TILE_OVFL 0x04 // tile limit exceeded on previous line +#define SPRL_HAVE_MASK0 0x02 // have sprite with x == 0 in 1st slot +#define SPRL_MASKED 0x01 // lo prio masking by sprite with x == 0 active +unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; // sprite_count, ^flags, tile_count, sprites_total, [spritep]..., last_width int rendstatus_old; int rendlines; @@ -706,7 +710,7 @@ last_cut_tile: // Index + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size // Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -static void DrawSprite(int *sprite, int sh) +static void DrawSprite(int *sprite, int sh, int w) { void (*fTileFunc)(unsigned char *pd, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; @@ -746,6 +750,7 @@ static void DrawSprite(int *sprite, int sh) else fTileFunc=TileNorm; } + if (w) width = w; // tile limit for (; width; width--,sx+=8,tile+=delta) { unsigned int pack; @@ -833,12 +838,13 @@ static NOINLINE void DrawAllSpritesInterlace(int pri, int sh) struct PicoVideo *pvid=&Pico.video; int i,u,table,link=0,sline=Pico.est.DrawScanline<<1; unsigned int *sprites[80]; // Sprite index + int max_sprites = Pico.video.reg[12]&1 ? 80 : 64; table=pvid->reg[5]&0x7f; if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode table<<=8; // Get sprite table address/2 - for (i=u=0; u < 80 && i < 21; u++) + for (i = u = 0; u < max_sprites && link < max_sprites; u++) { unsigned int *sprite; int code, sx, sy, height; @@ -888,15 +894,18 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) void (*fTileFunc)(unsigned char *pd, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; unsigned char *p; - int cnt; + int cnt, w; cnt = sprited[0] & 0x7f; if (cnt == 0) return; - p = &sprited[3]; + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites backwards: - for (cnt--; cnt >= 0; cnt--) + w = p[cnt]; // possibly clipped width of last sprite + for (cnt--; cnt >= 0; cnt--, w = 0) { int *sprite, code, pal, tile, sx, sy; int offs, delta, width, height, row; @@ -940,6 +949,7 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address delta<<=4; // Delta of address + if (w) width = w; // tile limit for (; width; width--,sx+=8,tile+=delta) { unsigned int pack; @@ -967,7 +977,9 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) if (cnt == 0) return; memset(mb, 0xff, sizeof(mb)); - p = &sprited[3]; + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites: for (entry = 0; entry < cnt; entry++) @@ -1019,6 +1031,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address delta<<=4; // Delta of address + if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? for (; width; width--,sx+=8,tile+=delta) { unsigned int pack; @@ -1065,10 +1078,10 @@ static NOINLINE void PrepareSprites(int full) { int pack; // updates: tilecode, sx - for (u=0; u < max_sprites && (pack = *pd); u++, pd+=2) + for (u=0; u < max_sprites && link < max_sprites && (pack = *pd); u++, pd+=2) { unsigned int *sprite; - int code2, sx, sy, height; + int code2, sx, sy, height, width; sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite @@ -1078,25 +1091,29 @@ static NOINLINE void PrepareSprites(int full) sx -= 0x78; // Get X coordinate + 8 sy = (pack << 16) >> 16; height = (pack >> 24) & 0xf; + width = (pack >> 28); if (sy < max_lines && - sy + (height<<3) > est->DrawScanline && // sprite onscreen (y)? - (sx > -24 || sx < max_width)) // onscreen x + sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)? { int y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; int entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); for (; y < sy + (height<<3) && y < max_lines; y++) { int i, cnt; - cnt = HighLnSpr[y][0] & 0x7f; - if (cnt >= max_line_sprites) continue; // sprite limit? + cnt = HighLnSpr[y][0]; + if (HighLnSpr[y][3] >= max_line_sprites) continue; // sprite limit? for (i = 0; i < cnt; i++) - if (((HighLnSpr[y][3+i] ^ entry) & 0x7f) == 0) goto found; + if (((HighLnSpr[y][4+i] ^ entry) & 0x7f) == 0) goto found; // this sprite was previously missing - HighLnSpr[y][3+cnt] = entry; - HighLnSpr[y][0] = cnt + 1; + HighLnSpr[y][3] ++; + if (sx > -24 && sx < max_width) { // onscreen x + HighLnSpr[y][4+cnt] = entry; // XXX wrong sequence? + HighLnSpr[y][5+cnt] = width; // XXX should count tiles for limit + HighLnSpr[y][0] = cnt + 1; + } found:; if (entry & 0x80) HighLnSpr[y][1] |= SPRL_HAVE_HI; @@ -1118,7 +1135,7 @@ found:; for (u = 0; u < max_lines; u++) *((int *)&HighLnSpr[u][0]) = 0; - for (u = 0; u < max_sprites; u++) + for (u = 0; u < max_sprites && link < max_sprites; u++) { unsigned int *sprite; int code, code2, sx, sy, hv, height, width; @@ -1138,7 +1155,7 @@ found:; if (sy < max_lines && sy + (height<<3) > est->DrawScanline) // sprite onscreen (y)? { - int entry, y, sx_min, onscr_x, maybe_op = 0; + int entry, y, w, sx_min, onscr_x, maybe_op = 0; sx_min = 8-(width<<3); onscr_x = sx_min < sx && sx < max_width; @@ -1149,29 +1166,36 @@ found:; y = (sy >= est->DrawScanline) ? sy : est->DrawScanline; for (; y < sy + (height<<3) && y < max_lines; y++) { - unsigned char *p = &HighLnSpr[y][0]; + unsigned char *p = &HighLnSpr[y][0]; int cnt = p[0]; - if (cnt >= max_line_sprites) continue; // sprite limit? - - if (p[2] >= max_line_sprites*2) { // tile limit? - p[0] |= 0x80; - continue; + if (p[3] >= max_line_sprites) continue; // sprite limit? + if ((p[1] & SPRL_MASKED) && !(entry & 0x80)) continue; // masked? + + w = width; + if (p[2] + width > max_line_sprites*2) { // tile limit? + if (y+1 < 240) HighLnSpr[y+1][1] |= SPRL_TILE_OVFL; + if (p[2] >= max_line_sprites*2) continue; + w = max_line_sprites*2 - p[2]; } - p[2] += width; + p[2] += w; + p[3] ++; if (sx == -0x78) { - if (cnt > 0) - p[0] |= 0x80; // masked, no more sprites for this line - continue; - } - // must keep the first sprite even if it's offscreen, for masking - if (cnt > 0 && !onscr_x) continue; // offscreen x + if (p[1] & (SPRL_HAVE_X|SPRL_TILE_OVFL)) + p[1] |= SPRL_MASKED; // masked, no more low sprites for this line + if (!(p[1] & SPRL_HAVE_X) && cnt == 0) + p[1] |= SPRL_HAVE_MASK0; // 1st sprite is masking + } else + p[1] |= SPRL_HAVE_X; + + if (!onscr_x) continue; // offscreen x - p[3+cnt] = entry; + p[4+cnt] = entry; + p[5+cnt] = w; // width clipped by tile limit for sprite renderer p[0] = cnt + 1; p[1] |= (entry & 0x80) ? SPRL_HAVE_HI : SPRL_HAVE_LO; p[1] |= maybe_op; // there might be op sprites on this line - if (cnt > 0 && (code2 & 0x8000) && !(p[3+cnt-1]&0x80)) + if (cnt > 0 && (code2 & 0x8000) && !(p[4+cnt-1]&0x80)) p[1] |= SPRL_LO_ABOVE_HI; } } @@ -1189,9 +1213,10 @@ found:; for (u = 0; u < max_lines; u++) { int y; - printf("c%03i: %2i, %2i: ", u, HighLnSpr[u][0] & 0x7f, HighLnSpr[u][2]); - for (y = 0; y < HighLnSpr[u][0] & 0x7f; y++) - printf(" %i", HighLnSpr[u][y+3]); + printf("c%03i: f %x c %2i/%2i w %2i: ", u, HighLnSpr[u][1], + HighLnSpr[u][0], HighLnSpr[u][3], HighLnSpr[u][2]); + for (y = 0; y < HighLnSpr[u][0]; y++) + printf(" %i", HighLnSpr[u][y+4]); printf("\n"); } #endif @@ -1203,20 +1228,22 @@ static void DrawAllSprites(unsigned char *sprited, int prio, int sh, struct PicoEState *est) { unsigned char *p; - int cnt; + int cnt, w = sprited[2]; cnt = sprited[0] & 0x7f; if (cnt == 0) return; - p = &sprited[3]; + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites backwards: - for (cnt--; cnt >= 0; cnt--) + w = p[cnt]; // possibly clipped width of last sprite + for (cnt--; cnt >= 0; cnt--, w = 0) { - int offs; + int *sp = HighPreSpr + (p[cnt]&0x7f) * 2; if ((p[cnt] >> 7) != prio) continue; - offs = (p[cnt]&0x7f) * 2; - DrawSprite(HighPreSpr + offs, sh); + DrawSprite(sp, sh, w); } } diff --git a/pico/draw2.c b/pico/draw2.c index 38a90ef3..85e2b275 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -420,12 +420,13 @@ static void DrawAllSpritesFull(int prio, int maxwidth) int i,u,link=0; unsigned int *sprites[80]; // Sprites int y_min=START_ROW*8, y_max=END_ROW*8; // for a simple sprite masking + int max_sprites = Pico.video.reg[12]&1 ? 80 : 64; table=pvid->reg[5]&0x7f; if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode table<<=8; // Get sprite table address/2 - for (i=u=0; u < 80; u++) + for (i = u = 0; u < max_sprites && link < max_sprites; u++) { unsigned int *sprite=NULL; int code, code2, sx, sy, height; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 2efc804c..fb6d0950 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -942,17 +942,23 @@ DrawTilesFromCache: .global DrawSpritesSHi DrawSpritesSHi: - ldr r3, [r0] + ldrb r3, [r0] mov r12,#0xff ands r3, r3, #0x7f bxeq lr - stmfd sp!, {r1,r4-r11,lr} @ +est - strb r12,[r0,#2] @ set end marker - add r10,r0, #3 @ r10=HighLnSpr end + stmfd sp!, {r1,r3-r11,lr} @ +est + strb r12,[r0,#3] @ set end marker + ldrb r12,[r0,#1] + add r10,r0, #4 @ r10=HighLnSpr end + mvn r12,r12 + tst r12,#0x6 @ masking in slot 1 and tile ovfl? + ldmeqfd sp!, {r1,r3-r11,pc} add r10,r10,r3 @ r10=HighLnSpr end + ldrb r12,[r10,#0] @ width of last sprite ldr r11,[r1, #OFS_EST_HighCol] + str r12,[sp, #4] mov r12,#0xf ldr lr, [r1, #OFS_EST_PicoMem_vram] @@ -963,7 +969,7 @@ DrawSpriteSHi: ldr r7, [sp] @ est ldr r1, [r7, #OFS_EST_HighPreSpr] cmp r0, #0xff - ldmeqfd sp!, {r1,r4-r11,pc} @ end of list + ldmeqfd sp!, {r1,r3-r11,pc} @ end of list and r0, r0, #0x7f add r0, r1, r0, lsl #3 @@ -1007,10 +1013,16 @@ DrawSpriteSHi: and r7, r7, #7 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address + ldr r0, [sp, #4] + add r6, r6, #1 @ inc now + cmp r0, #0 @ check width of last sprite + movne r6, r0 + movne r0, #0 + strne r0, [sp, #4] + mov r5, r5, lsl #4 @ delta<<=4; // Delta of address mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); - add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags b .dsprShi_loop_enter @@ -1126,11 +1138,18 @@ DrawAllSprites: @ time to do some real work stmfd sp!, {r1,r3-r11,lr} @ +sh|prio<<1 +est mov r12,#0xff - strb r12,[r0,#2] @ set end marker - add r10,r0, #3 + strb r12,[r0,#3] @ set end marker + ldrb r12,[r0,#1] + add r10,r0 ,#4 + mvn r12,r12 + tst r12,#0x6 @ masking in slot 1 and tile ovfl? + ldmeqfd sp!, {r1,r3-r11,pc} add r10,r10,r2 @ r10=HighLnSpr end + ldrb r12,[r10,#0] @ width of last sprite ldr r11,[r3, #OFS_EST_HighCol] + orr r1 ,r1 ,r12,lsl #24 + str r1, [sp] mov r12,#0xf ldr lr, [r3, #OFS_EST_PicoMem_vram] @@ -1140,13 +1159,13 @@ DrawAllSprites: DrawSprite: @ draw next sprite ldrb r0, [r10,#-1]! - ldr r8, [sp] @ sh|prio<<1 + ldr r4, [sp] @ sh|prio<<1|lastw<<24 ldr r7, [sp, #4] @ est - mov r2, r0, lsr #7 + mov r2, r0, lsl #24 cmp r0, #0xff ldmeqfd sp!, {r1,r3-r11,pc} @ end of list - cmp r2, r8, lsr #1 - bne DrawSprite @ wrong priority + eor r2, r2, r4, lsl #30 + bmi DrawSprite @ wrong priority ldr r1, [r7, #OFS_EST_HighPreSpr] and r0, r0, #0x7f add r0, r1, r0, lsl #3 @@ -1158,20 +1177,20 @@ DrawSprite: mov r5, r3, lsr #24 and r5, r5, #7 @ r5=height - mov r4, r3, lsl #16 @ r4=sy<<16 (tmp) + mov r8, r3, lsl #16 @ r8=sy<<16 (tmp) ldr r9, [r0, #4] - sub r7, r7, r4, asr #16 @ r7=row=DrawScanline-sy + sub r7, r7, r8, asr #16 @ r7=row=DrawScanline-sy mov r2, r9, asr #16 @ r2=sx mov r9, r9, lsl #16 mov r9, r9, lsr #16 - orr r9, r9, r8, lsl #31 @ r9=code|sh[31] + orr r9, r9, r4, lsl #31 @ r9=code|sh[31] tst r9, #0x1000 - movne r4, r5, lsl #3 - subne r4, r4, #1 - subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y + movne r8, r5, lsl #3 + subne r8, r8, #1 + subne r7, r8, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down tst r9, #0x0800 @@ -1183,7 +1202,12 @@ DrawSprite: and r7, r7, #7 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address -.dspr_continue: + add r6, r6, #1 @ inc now + cmp r4, #0x1000000 @ check width of last sprite + movhs r6, r4, lsr #24 + bichs r4, r4, #0xff000000 + strhs r4, [sp] + @ cache some stuff to avoid mem access mov r5, r5, lsl #4 @ delta<<=4; // Delta of address and r4, r9, #0x6000 @@ -1193,7 +1217,6 @@ DrawSprite: mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); orrmi r3, r3, #0x40 @ for sh/hi - add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags b .dspr_loop_enter diff --git a/pico/pico_int.h b/pico/pico_int.h index 70bfa710..a24fc6f6 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -667,8 +667,8 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est); void PicoDrawSetOutBufMD(void *dest, int increment); extern int (*PicoScanBegin)(unsigned int num); extern int (*PicoScanEnd)(unsigned int num); -#define MAX_LINE_SPRITES 29 -extern unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; +#define MAX_LINE_SPRITES 27 // +1 last sprite width, +4 hdr; total 32 +extern unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; extern void *DrawLineDestBase; extern int DrawLineDestIncrement; diff --git a/pico/videoport.c b/pico/videoport.c index d196ee4f..c2fbd0ca 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -200,6 +200,7 @@ static void DmaSlow(int len, unsigned int source) a = (a + inc) & 0x1ffff; } Pico.video.addr_u = a >> 16; + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; default: @@ -266,6 +267,7 @@ static NOINLINE void DmaFill(int data) // Increment address register a = (u16)(a + inc); } + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: // cram case 5: { // vsram @@ -289,7 +291,6 @@ static NOINLINE void DmaFill(int data) Pico.video.reg[0x15] = source; Pico.video.reg[0x16] = source >> 8; - Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; } static NOINLINE void CommandDma(void)