From e5fa9817777032758511868c8aaa9ff780786c3f Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 1 Jun 2008 13:45:34 +0000 Subject: [PATCH] accurate_sprites performance improvement, PSP is untested git-svn-id: file:///home/notaz/opt/svn/PicoDrive@478 be3aeb3a-fb24-0410-a615-afba39da0efa --- Pico/Draw.c | 350 +++++++++++++++++++++----------------------- Pico/Draw.s | 67 +++++---- Pico/Draw_amips.s | 37 +++++ Pico/Pico.c | 4 +- platform/gp2x/emu.c | 33 +++-- platform/psp/emu.c | 29 +++- 6 files changed, 291 insertions(+), 229 deletions(-) diff --git a/Pico/Draw.c b/Pico/Draw.c index 0d7f89b..7c092de 100644 --- a/Pico/Draw.c +++ b/Pico/Draw.c @@ -6,6 +6,17 @@ // For commercial use, separate licencing terms must be obtained. +/* + * The renderer has 4 modes now: + * - normal + * - shadow/hilight (s/h) + * - "sonic mode" for midline palette changes + * - accurate sprites (AS) + * + * AS and s/h both use upper bits for both priority and shadow/hilight flags. + * "sonic mode" is autodetected, shadow/hilight is enabled by emulated game. + * AS is enabled by user and takes priority over "sonic mode". + */ #include "PicoInt.h" @@ -26,8 +37,6 @@ static int HighCacheB[41+1]; static int HighCacheS[80+1]; // and sprites static int HighPreSpr[80*2+1]; // slightly preprocessed sprites -static unsigned char HighSprZ[320+8+8]; // Z-buffer for accurate sprites - int rendstatus = 0; int Scanline = 0; // Scanline @@ -36,6 +45,9 @@ static int skip_next_line=0; //unsigned short ppt[] = { 0x0f11, 0x0ff1, 0x01f1, 0x011f, 0x01ff, 0x0f1f, 0x0f0e, 0x0e7c }; +static void (*DrawAllSpritesLoPri)(int *hcache, int maxwidth, int prio, int sh) = NULL; +static void (*DrawAllSpritesHiPri)(int *hcache, int maxwidth, int prio, int sh) = NULL; + struct TileStrip { int nametab; // Position in VRAM of name table (for this tile line) @@ -50,9 +62,9 @@ struct TileStrip #ifdef _ASM_DRAW_C void DrawWindow(int tstart, int tend, int prio, int sh); void BackFill(int reg7, int sh); -void DrawSprite(int *sprite, int **hc, int sh); +void DrawSprite(int *sprite, int **hc, int sh, int as); void DrawTilesFromCache(int *hc, int sh, int rlim); -void DrawSpritesFromCache(int *hc, int sh); +void DrawSpritesFromCache(int *hc, int maxwidth, int prio, int sh); void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); void FinalizeLineBGR444(int sh); void FinalizeLineRGB555(int sh); @@ -122,7 +134,7 @@ static int TileFlip(int sx,int addr,int pal) #define sh_pix(x) \ if(!t); \ else if(t==0xe) pd[x]=(unsigned char)((pd[x]&0x3f)|0x80); /* hilight */ \ - else if(t==0xf) pd[x]=(unsigned char)((pd[x]&0x3f)|0xc0); /* shadow */ \ + else if(t==0xf) pd[x]=(unsigned char)( pd[x] |0xc0); /* shadow */ \ else pd[x]=(unsigned char)(pal|t) #ifndef _ASM_DRAW_C @@ -170,115 +182,104 @@ static int TileFlipSH(int sx,int addr,int pal) } #endif -static int TileNormZ(int sx,int addr,int pal,int zval) +#define tilepixelAS(mask,index,shift) \ + if (!(pd[index]&0xc0)) { t=pack&mask; if (t) pd[index]=(pal|(t>>shift)); } + +static int TileNormAS(int sx,int addr,int pal) { - unsigned int pack=0; unsigned int t=0; unsigned char *pd = HighCol+sx; - unsigned char *zb = HighSprZ+sx; - int collision = 0; + unsigned int pack=0; unsigned int t=0; pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels if (pack) { - t=pack&0x0000f000; if(t) { if(zb[0]) collision=1; if(zval>zb[0]) { pd[0]=(unsigned char)(pal|(t>>12)); zb[0]=(char)zval; } } - t=pack&0x00000f00; if(t) { if(zb[1]) collision=1; if(zval>zb[1]) { pd[1]=(unsigned char)(pal|(t>> 8)); zb[1]=(char)zval; } } - t=pack&0x000000f0; if(t) { if(zb[2]) collision=1; if(zval>zb[2]) { pd[2]=(unsigned char)(pal|(t>> 4)); zb[2]=(char)zval; } } - t=pack&0x0000000f; if(t) { if(zb[3]) collision=1; if(zval>zb[3]) { pd[3]=(unsigned char)(pal|(t )); zb[3]=(char)zval; } } - t=pack&0xf0000000; if(t) { if(zb[4]) collision=1; if(zval>zb[4]) { pd[4]=(unsigned char)(pal|(t>>28)); zb[4]=(char)zval; } } - t=pack&0x0f000000; if(t) { if(zb[5]) collision=1; if(zval>zb[5]) { pd[5]=(unsigned char)(pal|(t>>24)); zb[5]=(char)zval; } } - t=pack&0x00f00000; if(t) { if(zb[6]) collision=1; if(zval>zb[6]) { pd[6]=(unsigned char)(pal|(t>>20)); zb[6]=(char)zval; } } - t=pack&0x000f0000; if(t) { if(zb[7]) collision=1; if(zval>zb[7]) { pd[7]=(unsigned char)(pal|(t>>16)); zb[7]=(char)zval; } } - if (collision) Pico.video.status|=0x20; + tilepixelAS(0x0000f000, 0, 12); + tilepixelAS(0x00000f00, 1, 8); + tilepixelAS(0x000000f0, 2, 4); + tilepixelAS(0x0000000f, 3, 0); + tilepixelAS(0xf0000000, 4, 28); + tilepixelAS(0x0f000000, 5, 24); + tilepixelAS(0x00f00000, 6, 20); + tilepixelAS(0x000f0000, 7, 16); return 0; } return 1; // Tile blank } -static int TileFlipZ(int sx,int addr,int pal,int zval) +static int TileFlipAS(int sx,int addr,int pal) { - unsigned int pack=0; unsigned int t=0; unsigned char *pd = HighCol+sx; - unsigned char *zb = HighSprZ+sx; - int collision = 0; + unsigned int pack=0; unsigned int t=0; pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels if (pack) { - t=pack&0x000f0000; if(t) { if(zb[0]) collision=1; if(zval>zb[0]) { pd[0]=(unsigned char)(pal|(t>>16)); zb[0]=(char)zval; } } - t=pack&0x00f00000; if(t) { if(zb[1]) collision=1; if(zval>zb[1]) { pd[1]=(unsigned char)(pal|(t>>20)); zb[1]=(char)zval; } } - t=pack&0x0f000000; if(t) { if(zb[2]) collision=1; if(zval>zb[2]) { pd[2]=(unsigned char)(pal|(t>>24)); zb[2]=(char)zval; } } - t=pack&0xf0000000; if(t) { if(zb[3]) collision=1; if(zval>zb[3]) { pd[3]=(unsigned char)(pal|(t>>28)); zb[3]=(char)zval; } } - t=pack&0x0000000f; if(t) { if(zb[4]) collision=1; if(zval>zb[4]) { pd[4]=(unsigned char)(pal|(t )); zb[4]=(char)zval; } } - t=pack&0x000000f0; if(t) { if(zb[5]) collision=1; if(zval>zb[5]) { pd[5]=(unsigned char)(pal|(t>> 4)); zb[5]=(char)zval; } } - t=pack&0x00000f00; if(t) { if(zb[6]) collision=1; if(zval>zb[6]) { pd[6]=(unsigned char)(pal|(t>> 8)); zb[6]=(char)zval; } } - t=pack&0x0000f000; if(t) { if(zb[7]) collision=1; if(zval>zb[7]) { pd[7]=(unsigned char)(pal|(t>>12)); zb[7]=(char)zval; } } - if (collision) Pico.video.status|=0x20; + tilepixelAS(0x000f0000, 0, 16); + tilepixelAS(0x00f00000, 1, 20); + tilepixelAS(0x0f000000, 2, 24); + tilepixelAS(0xf0000000, 3, 28); + tilepixelAS(0x0000000f, 4, 0); + tilepixelAS(0x000000f0, 5, 4); + tilepixelAS(0x00000f00, 6, 8); + tilepixelAS(0x0000f000, 7, 12); return 0; } return 1; // Tile blank } +// there is a problem with transparent hi pri tiles (on layer), it will clear high bits +// and sprite tiles will be drawn needlessly. Hopefully that won't happen much.. +#define sh_pixAS(x) \ + if(!t); \ + else if(t==0xe) pd[x]=(unsigned char)((pd[x]&0x3f)|0x80); /* hilight */ \ + else if(t==0xf) pd[x]=(unsigned char)( pd[x] |0xc0); /* shadow */ \ + else if(!(pd[x]&0xc0)) pd[x]=(unsigned char)(pal|t) -#define sh_pixZ(x) \ - if(t) { \ - if(zb[x]) collision=1; \ - if(zval>zb[x]) { \ - if (t==0xe) { pd[x]=(unsigned char)((pd[x]&0x3f)|0x80); /* hilight */ } \ - else if(t==0xf) { pd[x]=(unsigned char)((pd[x]&0x3f)|0xc0); /* shadow */ } \ - else { zb[x]=(char)zval; pd[x]=(unsigned char)(pal|t); } \ - } \ - } - -static int TileNormZSH(int sx,int addr,int pal,int zval) +static int TileNormSHAS(int sx,int addr,int pal) { unsigned int pack=0; unsigned int t=0; unsigned char *pd = HighCol+sx; - unsigned char *zb = HighSprZ+sx; - int collision = 0; pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels if (pack) { - t=(pack&0x0000f000)>>12; sh_pixZ(0); - t=(pack&0x00000f00)>> 8; sh_pixZ(1); - t=(pack&0x000000f0)>> 4; sh_pixZ(2); - t=(pack&0x0000000f) ; sh_pixZ(3); - t=(pack&0xf0000000)>>28; sh_pixZ(4); - t=(pack&0x0f000000)>>24; sh_pixZ(5); - t=(pack&0x00f00000)>>20; sh_pixZ(6); - t=(pack&0x000f0000)>>16; sh_pixZ(7); - if(collision) Pico.video.status|=0x20; + t=(pack&0x0000f000)>>12; sh_pixAS(0); + t=(pack&0x00000f00)>> 8; sh_pixAS(1); + t=(pack&0x000000f0)>> 4; sh_pixAS(2); + t=(pack&0x0000000f) ; sh_pixAS(3); + t=(pack&0xf0000000)>>28; sh_pixAS(4); + t=(pack&0x0f000000)>>24; sh_pixAS(5); + t=(pack&0x00f00000)>>20; sh_pixAS(6); + t=(pack&0x000f0000)>>16; sh_pixAS(7); return 0; } return 1; // Tile blank } -static int TileFlipZSH(int sx,int addr,int pal,int zval) +static int TileFlipSHAS(int sx,int addr,int pal) { unsigned int pack=0; unsigned int t=0; unsigned char *pd = HighCol+sx; - unsigned char *zb = HighSprZ+sx; - int collision = 0; pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels if (pack) { - t=(pack&0x000f0000)>>16; sh_pixZ(0); - t=(pack&0x00f00000)>>20; sh_pixZ(1); - t=(pack&0x0f000000)>>24; sh_pixZ(2); - t=(pack&0xf0000000)>>28; sh_pixZ(3); - t=(pack&0x0000000f) ; sh_pixZ(4); - t=(pack&0x000000f0)>> 4; sh_pixZ(5); - t=(pack&0x00000f00)>> 8; sh_pixZ(6); - t=(pack&0x0000f000)>>12; sh_pixZ(7); - if(collision) Pico.video.status|=0x20; + t=(pack&0x000f0000)>>16; sh_pixAS(0); + t=(pack&0x00f00000)>>20; sh_pixAS(1); + t=(pack&0x0f000000)>>24; sh_pixAS(2); + t=(pack&0xf0000000)>>28; sh_pixAS(3); + t=(pack&0x0000000f) ; sh_pixAS(4); + t=(pack&0x000000f0)>> 4; sh_pixAS(5); + t=(pack&0x00000f00)>> 8; sh_pixAS(6); + t=(pack&0x0000f000)>>12; sh_pixAS(7); return 0; } return 1; // Tile blank } + // -------------------------------------------- #ifndef _ASM_DRAW_C @@ -725,7 +726,7 @@ last_cut_tile: // Index + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size // Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -static void DrawSprite(int *sprite, int **hc, int sh) +static void DrawSprite(int *sprite, int **hc, int sh, int as) { int width=0,height=0; int row=0,code=0; @@ -753,74 +754,21 @@ static void DrawSprite(int *sprite, int **hc, int sh) tile<<=4; tile+=(row&7)<<1; // Tile address - if(code&0x8000) { // high priority - cache it + if (code&0x8000) { // high priority - cache it *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>16)&0xf); - } else { - delta<<=4; // Delta of address - pal=((code>>9)&0x30)|(sh<<6); - - if(sh && (code&0x6000) == 0x6000) { - if(code&0x0800) fTileFunc=TileFlipSH; - else fTileFunc=TileNormSH; - } else { - if(code&0x0800) fTileFunc=TileFlip; - else fTileFunc=TileNorm; - } - - for (; width; width--,sx+=8,tile+=delta) - { - if(sx<=0) continue; - if(sx>=328) break; // Offscreen - - tile&=0x7fff; // Clip tile address - fTileFunc(sx,tile,pal); - } + // we need all for accurate sprites, cached will be used to recover ones overwritten by high layer + if (!as) return; } -} -#endif - - -// Index + 0 : hhhhvvvv s---hhvv yyyyyyyy yyyyyyyy // s: skip flag, h: horiz. size -// Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 - -static void DrawSpriteZ(int pack, int pack2, int shpri, int sprio) -{ - int width=0,height=0; - int row=0; - int pal; - int tile=0,delta=0; - int sx, sy; - int (*fTileFunc)(int sx,int addr,int pal,int zval); - - // parse the sprite data - sx=pack2>>16; // X - sy=(pack <<16)>>16; // Y - width=pack>>28; - height=(pack>>24)&7; // Width and height in tiles - row=Scanline-sy; // Row of the sprite we are on - - if (pack2&0x1000) row=(height<<3)-1-row; // Flip Y - - tile=pack2&0x7ff; // Tile number - tile+=row>>3; // Tile number increases going down - delta=height; // Delta to increase tile by going right - if (pack2&0x0800) { tile+=delta*(width-1); delta=-delta; } // Flip X - - tile<<=4; tile+=(row&7)<<1; // Tile address delta<<=4; // Delta of address - pal=((pack2>>9)&0x30); - if((shpri&1)&&!(shpri&2)) pal|=0x40; - - shpri&=1; - if((pack2&0x6000) != 0x6000) shpri = 0; - shpri |= (pack2&0x0800)>>10; - switch(shpri) { - default: - case 0: fTileFunc=TileNormZ; break; - case 1: fTileFunc=TileNormZSH; break; - case 2: fTileFunc=TileFlipZ; break; - case 3: fTileFunc=TileFlipZSH; break; + pal=((code>>9)&0x30)|((sh|as)<<6); + + if (sh && (code&0x6000) == 0x6000) { + if(code&0x0800) fTileFunc=TileFlipSH; + else fTileFunc=TileNormSH; + } else { + if(code&0x0800) fTileFunc=TileFlip; + else fTileFunc=TileNorm; } for (; width; width--,sx+=8,tile+=delta) @@ -829,9 +777,10 @@ static void DrawSpriteZ(int pack, int pack2, int shpri, int sprio) if(sx>=328) break; // Offscreen tile&=0x7fff; // Clip tile address - fTileFunc(sx,tile,pal,sprio); + fTileFunc(sx,tile,pal); } } +#endif static void DrawSpriteInterlace(unsigned int *sprite) { @@ -877,7 +826,7 @@ static void DrawSpriteInterlace(unsigned int *sprite) } -static void DrawAllSpritesInterlace(int pri, int maxwidth) +static void DrawAllSpritesInterlace(int *hcache, int maxwidth, int pri, int sh) { struct PicoVideo *pvid=&Pico.video; int i,u,table,link=0,sline=Scanline<<1; @@ -925,7 +874,7 @@ static void DrawAllSpritesInterlace(int pri, int maxwidth) #ifndef _ASM_DRAW_C -static void DrawSpritesFromCache(int *hc, int sh) +static void DrawSpritesFromCache(int *hc, int maxwidth, int prio, int sh) { int code, tile, sx, delta, width; int pal; @@ -963,6 +912,43 @@ static void DrawSpritesFromCache(int *hc, int sh) } #endif +static void DrawSpritesFromCacheAS(int *hc, int maxwidth, int prio, int sh) +{ + int code, tile, sx, delta, width; + int pal; + int (*fTileFunc)(int sx,int addr,int pal); + + // *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>24)&0xf); + + while((code=*hc++)) { + pal=(code&0x30); + delta=code&0xf; + width=delta>>2; delta&=3; + width++; delta++; // Width and height in tiles + if (code&0x10000) delta=-delta; // Flip X + delta<<=4; + tile=((unsigned int)code>>17)<<1; + sx=(code<<16)>>22; // sx can be negative (start offscreen), so sign extend + + if(sh && pal == 0x30) { // + if(code&0x10000) fTileFunc=TileFlipSHAS; + else fTileFunc=TileNormSHAS; + } else { + if(code&0x10000) fTileFunc=TileFlipAS; + else fTileFunc=TileNormAS; + } + + for (; width; width--,sx+=8,tile+=delta) + { + if(sx<=0) continue; + if(sx>=328) break; // Offscreen + + tile&=0x7fff; // Clip tile address + fTileFunc(sx,tile,pal); + } + } +} + // Index + 0 : ----hhvv -lllllll -------y yyyyyyyy // Index + 4 : -------x xxxxxxxx pccvhnnn nnnnnnnn @@ -1066,23 +1052,12 @@ static void DrawAllSprites(int *hcache, int maxwidth, int prio, int sh) int max_line_sprites = 20; // 20 sprites, 40 tiles int *ps, pack, rs = rendstatus, scan = Scanline; - if(rs&8) { - DrawAllSpritesInterlace(prio, maxwidth); - return; - } if (rs & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)) { //dprintf("PrepareSprites(%i) [%i]", (rs>>4)&1, scan); PrepareSprites(rs & PDRAW_DIRTY_SPRITES); rendstatus = rs & ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); } - if (!(SpriteBlocks & (1<<(scan>>3)))) return; - - if (((rs&PDRAW_ACC_SPRITES)||sh) && prio==0) - memset(HighSprZ, 0, 328); - if (!(rs&PDRAW_ACC_SPRITES)&&prio) { - if(hcache[0]) DrawSpritesFromCache(hcache, sh); - return; - } + if (!(SpriteBlocks & (1<<(scan>>3)))) { *hcache = 0; return; } if (PicoOpt & POPT_DIS_SPRITE_LIM) max_line_sprites = 80; @@ -1125,27 +1100,18 @@ static void DrawAllSprites(int *hcache, int maxwidth, int prio, int sh) if (pack & 0x00800000) continue; - // accurate sprites - //dprintf("P:%i",((sx>>15)&1)); - if (rs & PDRAW_ACC_SPRITES) { - // might need to skip this sprite - if ((pack2&0x8000) ^ (prio<<15)) continue; - DrawSpriteZ(pack, pack2, sh|(prio<<1), n^0xff); - continue; - } - // sprite is good, save it's pointer sprites[i++]=ps; } + n = (rs & PDRAW_ACC_SPRITES) ? 1 : 0; + // Go through sprites backwards: - if (!(rs & PDRAW_ACC_SPRITES)) { - for (i--; i>=0; i--) - DrawSprite(sprites[i],&hcache,sh); + for (i--; i>=0; i--) + DrawSprite(sprites[i],&hcache,sh,n); - // terminate cache list - *hcache = 0; - } + // terminate cache list + *hcache = 0; } @@ -1176,7 +1142,7 @@ static void FinalizeLineBGR444(int sh) unsigned short *pd=DrawLineDest; unsigned char *ps=HighCol+8; unsigned short *pal=Pico.cram; - int len, i, t; + int len, i, t, mask=0xff; if (Pico.video.reg[12]&1) { len = 320; @@ -1201,8 +1167,11 @@ static void FinalizeLineBGR444(int sh) } } + if (!sh && (rendstatus & PDRAW_ACC_SPRITES)) + mask=0x3f; // accurate sprites + for(i = 0; i < len; i++) - pd[i] = pal[ps[i]]; + pd[i] = pal[ps[i] & mask]; } @@ -1247,15 +1216,22 @@ static void FinalizeLineRGB555(int sh) len = 256; } + { #ifndef PSP - for (i = 0; i < len; i++) - pd[i] = pal[ps[i]]; + int mask=0xff; + if (!sh && (rendstatus & PDRAW_ACC_SPRITES)) + mask=0x3f; // accurate sprites, upper bits are priority stuff + + for (i = 0; i < len; i++) + pd[i] = pal[ps[i] & mask]; #else - { extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); - amips_clut(pd, ps, pal, len); - } + extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); + if (!sh && (rendstatus & PDRAW_ACC_SPRITES)) + amips_clut_6bit(pd, ps, pal, len); + else amips_clut(pd, ps, pal, len); #endif + } } #endif @@ -1265,7 +1241,7 @@ static void FinalizeLine8bit(int sh) int len, rs = rendstatus; static int dirty_count; - if (!sh && Pico.m.dirtyPal == 1 && Scanline < 222) + if (!sh && !(rs & PDRAW_ACC_SPRITES) && Pico.m.dirtyPal == 1 && Scanline < 222) { // a hack for mid-frame palette changes if (!(rs & PDRAW_SONIC_MODE)) @@ -1320,7 +1296,7 @@ static void handle_early_blank(int scanline, int sh) // -------------------------------------------- -static int DrawDisplay(int sh) +static int DrawDisplay(int sh, int as) { struct PicoVideo *pvid=&Pico.video; int win=0,edge=0,hvwind=0; @@ -1354,16 +1330,16 @@ static int DrawDisplay(int sh) } } - DrawLayer(1|(sh<<1), HighCacheB, 0, maxcells); + DrawLayer(1|((sh|as)<<1), HighCacheB, 0, maxcells); if (hvwind == 1) - DrawWindow(0, maxcells>>1, 0, sh); + DrawWindow(0, maxcells>>1, 0, sh|as); else if (hvwind == 2) { // ahh, we have vertical window - DrawLayer(0|(sh<<1), HighCacheA, (win&0x80) ? 0 : edge<<1, (win&0x80) ? edge<<1 : maxcells); - DrawWindow( (win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 0, sh); + DrawLayer(0|((sh|as)<<1), HighCacheA, (win&0x80) ? 0 : edge<<1, (win&0x80) ? edge<<1 : maxcells); + DrawWindow( (win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 0, sh|as); } else - DrawLayer(0|(sh<<1), HighCacheA, 0, maxcells); - DrawAllSprites(HighCacheS, maxw, 0, sh); + DrawLayer(0|((sh|as)<<1), HighCacheA, 0, maxcells); + DrawAllSpritesLoPri(HighCacheS, maxw, 0, sh); if (HighCacheB[0]) DrawTilesFromCache(HighCacheB, sh, 328); if (hvwind == 1) @@ -1373,7 +1349,7 @@ static int DrawDisplay(int sh) DrawWindow((win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 1, sh); } else if (HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh, 328); - DrawAllSprites(HighCacheS, maxw, 1, sh); + DrawAllSpritesHiPri(HighCacheS, maxw, 1, sh); #if 0 { @@ -1392,10 +1368,17 @@ PICO_INTERNAL void PicoFrameStart(void) { // prepare to do this frame rendstatus = (PicoOpt&0x80)>>5; // accurate sprites, clear everything else - if (rendstatus) - Pico.video.status &= ~0x0020; - else Pico.video.status |= 0x0020; // sprite collision - if ((Pico.video.reg[12]&6) == 6) rendstatus |= PDRAW_INTERLACE; // interlace mode + if ((Pico.video.reg[12]&6) == 6) { + rendstatus |= PDRAW_INTERLACE; // interlace mode + DrawAllSpritesLoPri = DrawAllSpritesInterlace; + DrawAllSpritesHiPri = DrawAllSpritesInterlace; + } + else + { + DrawAllSpritesLoPri = DrawAllSprites; + DrawAllSpritesHiPri = rendstatus ? DrawSpritesFromCacheAS : DrawSpritesFromCache; + } + if (Pico.m.dirtyPal) Pico.m.dirtyPal = 2; // reset dirty if needed PrepareSprites(1); @@ -1404,11 +1387,12 @@ PICO_INTERNAL void PicoFrameStart(void) PICO_INTERNAL int PicoLine(int scan) { - int sh; + int sh, as = 0; if (skip_next_line>0) { skip_next_line--; return 0; } // skip_next_line rendering lines Scanline=scan; sh=(Pico.video.reg[0xC]&8)>>3; // shadow/hilight? + if (rendstatus & PDRAW_ACC_SPRITES) as|=1; // accurate sprites if (rendstatus & PDRAW_EARLY_BLANK) { if (scan > 0) handle_early_blank(scan, sh); @@ -1419,9 +1403,9 @@ PICO_INTERNAL int PicoLine(int scan) skip_next_line = PicoScanBegin(scan); // Draw screen: - BackFill(Pico.video.reg[7], sh); + BackFill(Pico.video.reg[7], sh|as); if (Pico.video.reg[1]&0x40) - DrawDisplay(sh); + DrawDisplay(sh, as); if (FinalizeLine != NULL) FinalizeLine(sh); diff --git a/Pico/Draw.s b/Pico/Draw.s index 5ddd5e6..3cafde9 100644 --- a/Pico/Draw.s +++ b/Pico/Draw.s @@ -896,7 +896,7 @@ DrawTilesFromCache: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawSpritesFromCache @ int *hc, int sh +.global DrawSpritesFromCache @ int *hc, int maxwidth, int prio, int sh DrawSpritesFromCache: stmfd sp!, {r4-r11,lr} @@ -911,7 +911,7 @@ DrawSpritesFromCache: mov r12,#0xf .endif ldr lr, =(Pico+0x10000) @ lr=Pico.vram - mov r6, r1, lsl #31 + mov r6, r3, lsl #31 orr r6, r6, #1<<30 mov r10, r0 @@ -1032,11 +1032,12 @@ DrawSpritesFromCache: @ + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size @ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -.global DrawSprite @ unsigned int *sprite, int **hc, int sh +.global DrawSprite @ unsigned int *sprite, int **hc, int sh, int acc_sprites DrawSprite: stmfd sp!, {r4-r9,r11,lr} + orr r8, r3, r2, lsl #4 ldr r3, [r0] @ sprite[0] ldr r7, =Scanline mov r6, r3, lsr #28 @@ -1050,10 +1051,10 @@ DrawSprite: ldr r9, [r0, #4] sub r7, r7, r4, asr #16 @ r7=row=Scanline-sy - tst r2, r2 mov r2, r9, asr #16 @ r2=sx - bic r9, r9, #0xfe000000 - orrne r9, r9, #1<<31 @ r9=code|(sh<<31) + mov r9, r9, lsl #16 + mov r9, r9, lsr #16 + orr r9, r9, r8, lsl #27 @ r9=code|sh[31]|as[27] tst r9, #0x1000 movne r4, r5, lsl #3 @@ -1075,6 +1076,7 @@ DrawSprite: tst r9, #0x8000 bne .dspr_cache @ if(code&0x8000) // high priority - cache it +.dspr_continue: @ cache some stuff to avoid mem access .if OVERRIDE_HIGHCOL ldr r11,=HighCol @@ -1089,11 +1091,10 @@ DrawSprite: mov r5, r5, lsl #4 @ delta<<=4; // Delta of address and r4, r9, #0x6000 orr r9, r9, r4, lsl #16 - orr r9, r9, #0x10000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) + orrs r9, r9, #0x10000000 @ r9=scc1 a??? ... (s=shadow/hilight, cc=pal, a=acc_spr) - tst r9, #1<<31 mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); - orrne r3, r3, #0x40 @ shadow by default + orrmi r3, r3, #0x40 @ shadow by default add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags @@ -1184,19 +1185,21 @@ DrawSprite: mov r4, r8, lsl #16 @ tile tst r9, #0x0800 orrne r4, r4, #0x10000 @ code&0x0800 - mov r2, r2, lsl #22 - orr r4, r4, r2, lsr #16 @ (sx<<6)&0x0000ffc0 - and r2, r9, #0x6000 - orr r4, r4, r2, lsr #9 @ (code>>9)&0x30 + mov r0, r2, lsl #22 + orr r4, r4, r0, lsr #16 @ (sx<<6)&0x0000ffc0 + and r0, r9, #0x6000 + orr r4, r4, r0, lsr #9 @ (code>>9)&0x30 mov r3, r3, lsl #12 - ldr r2, [r1] + ldr r0, [r1] orr r4, r4, r3, lsr #28 @ (sprite[0]>>24)&0xf - str r4, [r2], #4 - str r2, [r1] + str r4, [r0], #4 + str r0, [r1] - ldmfd sp!, {r4-r9,r11,lr} - bx lr + tst r9, #(1<<27) + ldmeqfd sp!, {r4-r9,r11,lr} + bne .dspr_continue @ draw anyway if accurate sprites enabled + bxeq lr @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -1452,23 +1455,29 @@ FinalizeLineBGR444: bne .fl_loopcpBGR444_hi sub r3, r4, #0x40*3*2 + mov r6, #1 .fl_noshBGR444: + ldr r12,=rendstatus + eors r6, r6, #1 @ sh is 0 + ldr r12,[r12] + mov lr, #0xff + tstne r12,#(1<<2) @ and PDRAW_ACC_SPRITES + .if OVERRIDE_HIGHCOL ldr r1, =HighCol - mov lr, #0xff + movne lr, #0x3f ldr r1, [r1] mov lr, lr, lsl #1 add r1, r1, #8 .else ldr r1, =(HighCol+8) - mov lr, #0xff + movne lr, #0x3f mov lr, lr, lsl #1 .endif .fl_loopBGR444: - ldr r12, [r1], #4 subs r2, r2, #1 @@ -1478,11 +1487,10 @@ FinalizeLineBGR444: ldrh r5, [r3, r5] and r6, lr, r12, lsr #15 ldrh r6, [r3, r6] + and r12,lr, r12, lsr #23 + ldrh r12,[r3, r12] @ 1c.i. orr r4, r4, r5, lsl #16 - - and r5, lr, r12, lsr #23 - ldrh r5, [r3, r5] @ 2c.i. - orr r5, r6, r5, lsl #16 + orr r5, r6, r12,lsl #16 stmia r0!, {r4,r5} bne .fl_loopBGR444 @@ -1617,8 +1625,16 @@ FinalizeLineRGB555: bne .fl_loopcpRGB555_hi sub r3, r3, #0x40*2 + mov r6, #1 .fl_noshRGB555: + ldr r12,=rendstatus + eors r6, r6, #1 @ sh is 0 + ldr r12,[r12] + mov lr, #0xff + tstne r12,#(1<<2) @ and PDRAW_ACC_SPRITES + movne lr, #0x3f + .if OVERRIDE_HIGHCOL ldr r1, =HighCol ldr r0, =DrawLineDest @@ -1632,7 +1648,6 @@ FinalizeLineRGB555: .endif ldrb r12, [r8, #12] - mov lr, #0xff mov lr, lr, lsl #1 tst r12, #1 diff --git a/Pico/Draw_amips.s b/Pico/Draw_amips.s index c05391c..29aa7db 100644 --- a/Pico/Draw_amips.s +++ b/Pico/Draw_amips.s @@ -45,6 +45,43 @@ amips_clut_loop: nop +.global amips_clut_6bit + +amips_clut_6bit: + srl $a3, 2 + li $t4, 0 + li $t5, 0 + li $t6, 0 + li $t7, 0 +amips_clut_loop6: + lbu $t0, 0($a1) # tried lw here, no improvement noticed + lbu $t1, 1($a1) + lbu $t2, 2($a1) + lbu $t3, 3($a1) + ins $t4, $t0, 1, 6 + ins $t5, $t1, 1, 6 + ins $t6, $t2, 1, 6 + ins $t7, $t3, 1, 6 + addu $t0, $t4, $a2 + addu $t1, $t5, $a2 + addu $t2, $t6, $a2 + addu $t3, $t7, $a2 + lhu $t0, 0($t0) + lhu $t1, 0($t1) + lhu $t2, 0($t2) + lhu $t3, 0($t3) + ins $t0, $t1, 16, 16 # ins rt, rs, pos, size - Insert size bits starting + ins $t2, $t3, 16, 16 # from the LSB of rs into rt starting at position pos + sw $t0, 0($a0) + sw $t2, 4($a0) + addiu $a0, 8 + addiu $a3, -1 + bnez $a3, amips_clut_loop6 + addiu $a1, 4 + jr $ra + nop + + # $a0 - pd, $a1 - tile word, $a2 - pal # ext rt, rs, pos, size // Extract size bits from position pos in rs and store in rt diff --git a/Pico/Pico.c b/Pico/Pico.c index 93d04e7..7146e2c 100644 --- a/Pico/Pico.c +++ b/Pico/Pico.c @@ -157,7 +157,7 @@ int PicoReset(void) Pico.m.dirtyPal = 1; PicoDetectRegion(); - Pico.video.status = 0x3408 | Pico.m.pal; // 'always set' bits | vblank | pal + Pico.video.status = 0x3428 | Pico.m.pal; // 'always set' bits | vblank | collision | pal PsndReset(); // pal must be known here @@ -320,7 +320,7 @@ PICO_INTERNAL void PicoSyncZ80(int m68k_cycles_done) z80_cycle_aim = cycles_68k_to_z80(m68k_cycles_done); cnt = z80_cycle_aim - z80_cycle_cnt; - elprintf(EL_ANOMALY, "z80 sync %i (%i|%i -> %i|%i)", cnt, z80_cycle_cnt, z80_cycle_cnt / 228, + elprintf(EL_BUSREQ, "z80 sync %i (%i|%i -> %i|%i)", cnt, z80_cycle_cnt, z80_cycle_cnt / 228, z80_cycle_aim, z80_cycle_aim / 228); if (cnt > 0) diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index f580f84..5f4a0e2 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -291,26 +291,39 @@ static void blit(const char *fps, const char *notice) // 8bit accurate renderer if (Pico.m.dirtyPal) { + int pallen = 0x40; Pico.m.dirtyPal = 0; - if(Pico.video.reg[0xC]&8) { // shadow/hilight mode + if (Pico.video.reg[0xC]&8) // shadow/hilight mode + { vidConvCpyRGB32(localPal, Pico.cram, 0x40); vidConvCpyRGB32sh(localPal+0x40, Pico.cram, 0x40); vidConvCpyRGB32hi(localPal+0x80, Pico.cram, 0x40); memcpy32(localPal+0xc0, localPal+0x40, 0x40); - localPal[0xc0] = 0x0000c000; - localPal[0xd0] = 0x00c00000; - localPal[0xe0] = 0x00000000; // reserved pixels for OSD - localPal[0xf0] = 0x00ffffff; - gp2x_video_setpalette(localPal, 0x100); - } else if (rendstatus & PDRAW_SONIC_MODE) { // mid-frame palette changes + pallen = 0x100; + } + else if (rendstatus & PDRAW_ACC_SPRITES) { + vidConvCpyRGB32(localPal, Pico.cram, 0x40); + memcpy32(localPal+0x40, localPal, 0x40); + memcpy32(localPal+0x80, localPal, 0x40); + memcpy32(localPal+0xc0, localPal, 0x40); + pallen = 0x100; + } + else if (rendstatus & PDRAW_SONIC_MODE) { // mid-frame palette changes vidConvCpyRGB32(localPal, Pico.cram, 0x40); vidConvCpyRGB32(localPal+0x40, HighPal, 0x40); vidConvCpyRGB32(localPal+0x80, HighPal+0x40, 0x40); - gp2x_video_setpalette(localPal, 0xc0); - } else { + pallen = 0xc0; + } + else { vidConvCpyRGB32(localPal, Pico.cram, 0x40); - gp2x_video_setpalette(localPal, 0x40); } + if (pallen > 0xc0) { + localPal[0xc0] = 0x0000c000; + localPal[0xd0] = 0x00c00000; + localPal[0xe0] = 0x00000000; // reserved pixels for OSD + localPal[0xf0] = 0x00ffffff; + } + gp2x_video_setpalette(localPal, pallen); } } diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 46de80c..4857371 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -162,6 +162,9 @@ void emu_setDefaultConfig(void) extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); +extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); + +extern void (*amips_clut_f)(unsigned short *dst, unsigned char *src, unsigned short *pal, int count) = NULL; struct Vertex { @@ -235,7 +238,7 @@ static void set_scaling_params(void) */ } -static void do_pal_update(int allow_sh) +static void do_pal_update(int allow_sh, int allow_as) { unsigned int *dpal=(void *)localPal; int i; @@ -244,6 +247,9 @@ static void do_pal_update(int allow_sh) // dpal[i] = ((spal[i]&0x000f000f)<< 1)|((spal[i]&0x00f000f0)<<3)|((spal[i]&0x0f000f00)<<4); do_pal_convert(localPal, Pico.cram, currentConfig.gamma, currentConfig.gamma2); + Pico.m.dirtyPal = 0; + need_pal_upload = 1; + if (allow_sh && (Pico.video.reg[0xC]&8)) // shadow/hilight? { // shadowed pixels @@ -260,8 +266,12 @@ static void do_pal_update(int allow_sh) } localPal[0xe0] = 0; } - Pico.m.dirtyPal = 0; - need_pal_upload = 1; + else if (allow_as && (rendstatus & PDRAW_ACC_SPRITES)) + { + memcpy32(localPal+0x40, localPal, 0x40); + memcpy32(localPal+0x80, localPal, 0x40); + memcpy32(localPal+0xc0, localPal, 0x40); + } } static void do_slowmode_lines(int line_to) @@ -272,7 +282,7 @@ static void do_slowmode_lines(int line_to) if (!(Pico.video.reg[1]&8)) { line = 8; dst += 512*8; src += 512*8; } for (; line < line_to; line++, dst+=512, src+=512) - amips_clut(dst, src, localPal, line_len); + amips_clut_f(dst, src, localPal, line_len); } static void EmuScanPrepare(void) @@ -282,7 +292,10 @@ static void EmuScanPrepare(void) dynamic_palette = 0; if (Pico.m.dirtyPal) - do_pal_update(1); + do_pal_update(1, 1); + if ((rendstatus & PDRAW_ACC_SPRITES) && !(Pico.video.reg[0xC]&8)) + amips_clut_f = amips_clut_6bit; + else amips_clut_f = amips_clut; } static int EmuScanSlowBegin(unsigned int num) @@ -304,13 +317,13 @@ static int EmuScanSlowEnd(unsigned int num) do_slowmode_lines(num); dynamic_palette = 1; } - do_pal_update(1); + do_pal_update(1, 0); } if (dynamic_palette) { int line_len = (Pico.video.reg[12]&1) ? 320 : 256; void *dst = (char *)VRAM_STUFF + 512*240 + 512*2*num; - amips_clut(dst, HighCol + 8, localPal, line_len); + amips_clut_f(dst, HighCol + 8, localPal, line_len); } return 0; @@ -344,7 +357,7 @@ static void blitscreen_clut(void) } if ((PicoOpt&0x10) && Pico.m.dirtyPal) - do_pal_update(0); + do_pal_update(0, 0); sceKernelDcacheWritebackAll(); -- 2.39.2