From 07abbab17a9baab5eeabe30767b0336326049994 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Jun 2008 19:48:15 +0000 Subject: [PATCH] new faster sprite priority and sh/hi hadling git-svn-id: file:///home/notaz/opt/svn/PicoDrive@486 be3aeb3a-fb24-0410-a615-afba39da0efa --- Pico/Draw.c | 427 ++++++++++++++++++++----------------------- Pico/Draw.s | 294 ++++++++++++++++------------- platform/gp2x/gp2x.c | 4 +- 3 files changed, 369 insertions(+), 356 deletions(-) diff --git a/Pico/Draw.c b/Pico/Draw.c index 7c092de..019fdc0 100644 --- a/Pico/Draw.c +++ b/Pico/Draw.c @@ -36,6 +36,7 @@ static int HighCacheA[41+1]; // caches for high layers static int HighCacheB[41+1]; static int HighCacheS[80+1]; // and sprites static int HighPreSpr[80*2+1]; // slightly preprocessed sprites +int *HighCacheS_ptr; int rendstatus = 0; int Scanline = 0; // Scanline @@ -62,7 +63,7 @@ struct TileStrip #ifdef _ASM_DRAW_C void DrawWindow(int tstart, int tend, int prio, int sh); void BackFill(int reg7, int sh); -void DrawSprite(int *sprite, int **hc, int sh, int as); +void DrawSprite(int *sprite, int sh, int as); void DrawTilesFromCache(int *hc, int sh, int rlim); void DrawSpritesFromCache(int *hc, int maxwidth, int prio, int sh); void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); @@ -82,202 +83,115 @@ void blockcpy_or(void *dst, void *src, size_t n, int pat) #endif -#ifdef _ASM_DRAW_C_AMIPS -int TileNorm(int sx,int addr,int pal); -int TileFlip(int sx,int addr,int pal); -#else -static int TileNorm(int sx,int addr,int pal) -{ - unsigned char *pd = HighCol+sx; - unsigned int pack=0; unsigned int t=0; - - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - if (pack) - { - t=pack&0x0000f000; if (t) pd[0]=(unsigned char)(pal|(t>>12)); - t=pack&0x00000f00; if (t) pd[1]=(unsigned char)(pal|(t>> 8)); - t=pack&0x000000f0; if (t) pd[2]=(unsigned char)(pal|(t>> 4)); - t=pack&0x0000000f; if (t) pd[3]=(unsigned char)(pal|(t )); - t=pack&0xf0000000; if (t) pd[4]=(unsigned char)(pal|(t>>28)); - t=pack&0x0f000000; if (t) pd[5]=(unsigned char)(pal|(t>>24)); - t=pack&0x00f00000; if (t) pd[6]=(unsigned char)(pal|(t>>20)); - t=pack&0x000f0000; if (t) pd[7]=(unsigned char)(pal|(t>>16)); - return 0; - } - - return 1; // Tile blank +#define TileNormMaker(funcname,pix_func) \ +static int funcname(int sx,int addr,int pal) \ +{ \ + unsigned char *pd = HighCol+sx; \ + unsigned int pack=0; unsigned int t=0; \ + \ + pack=*(unsigned int *)(Pico.vram+addr); /* Get 8 pixels */ \ + if (pack) \ + { \ + t=(pack&0x0000f000)>>12; pix_func(0); \ + t=(pack&0x00000f00)>> 8; pix_func(1); \ + t=(pack&0x000000f0)>> 4; pix_func(2); \ + t=(pack&0x0000000f) ; pix_func(3); \ + t=(pack&0xf0000000)>>28; pix_func(4); \ + t=(pack&0x0f000000)>>24; pix_func(5); \ + t=(pack&0x00f00000)>>20; pix_func(6); \ + t=(pack&0x000f0000)>>16; pix_func(7); \ + return 0; \ + } \ + \ + return 1; /* Tile blank */ \ } -static int TileFlip(int sx,int addr,int pal) -{ - unsigned char *pd = HighCol+sx; - unsigned int pack=0; unsigned int t=0; - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - if (pack) - { - t=pack&0x000f0000; if (t) pd[0]=(unsigned char)(pal|(t>>16)); - t=pack&0x00f00000; if (t) pd[1]=(unsigned char)(pal|(t>>20)); - t=pack&0x0f000000; if (t) pd[2]=(unsigned char)(pal|(t>>24)); - t=pack&0xf0000000; if (t) pd[3]=(unsigned char)(pal|(t>>28)); - t=pack&0x0000000f; if (t) pd[4]=(unsigned char)(pal|(t )); - t=pack&0x000000f0; if (t) pd[5]=(unsigned char)(pal|(t>> 4)); - t=pack&0x00000f00; if (t) pd[6]=(unsigned char)(pal|(t>> 8)); - t=pack&0x0000f000; if (t) pd[7]=(unsigned char)(pal|(t>>12)); - return 0; - } - return 1; // Tile blank +#define TileFlipMaker(funcname,pix_func) \ +static int funcname(int sx,int addr,int pal) \ +{ \ + unsigned char *pd = HighCol+sx; \ + unsigned int pack=0; unsigned int t=0; \ + \ + pack=*(unsigned int *)(Pico.vram+addr); /* Get 8 pixels */ \ + if (pack) \ + { \ + t=(pack&0x000f0000)>>16; pix_func(0); \ + t=(pack&0x00f00000)>>20; pix_func(1); \ + t=(pack&0x0f000000)>>24; pix_func(2); \ + t=(pack&0xf0000000)>>28; pix_func(3); \ + t=(pack&0x0000000f) ; pix_func(4); \ + t=(pack&0x000000f0)>> 4; pix_func(5); \ + t=(pack&0x00000f00)>> 8; pix_func(6); \ + t=(pack&0x0000f000)>>12; pix_func(7); \ + return 0; \ + } \ + \ + return 1; /* Tile blank */ \ } -#endif -// tile renderers for hacky operator sprite support -#define sh_pix(x) \ - if(!t); \ - else if(t==0xe) pd[x]=(unsigned char)((pd[x]&0x3f)|0x80); /* hilight */ \ - else if(t==0xf) pd[x]=(unsigned char)( pd[x] |0xc0); /* shadow */ \ - else pd[x]=(unsigned char)(pal|t) -#ifndef _ASM_DRAW_C -static int TileNormSH(int sx,int addr,int pal) -{ - unsigned int pack=0; unsigned int t=0; - unsigned char *pd = HighCol+sx; - - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - if (pack) - { - t=(pack&0x0000f000)>>12; sh_pix(0); - t=(pack&0x00000f00)>> 8; sh_pix(1); - t=(pack&0x000000f0)>> 4; sh_pix(2); - t=(pack&0x0000000f) ; sh_pix(3); - t=(pack&0xf0000000)>>28; sh_pix(4); - t=(pack&0x0f000000)>>24; sh_pix(5); - t=(pack&0x00f00000)>>20; sh_pix(6); - t=(pack&0x000f0000)>>16; sh_pix(7); - return 0; - } +#ifdef _ASM_DRAW_C_AMIPS +int TileNorm(int sx,int addr,int pal); +int TileFlip(int sx,int addr,int pal); +#else - return 1; // Tile blank -} +#define pix_just_write(x) \ + if (t) pd[x]=pal|t -static int TileFlipSH(int sx,int addr,int pal) -{ - unsigned int pack=0; unsigned int t=0; - unsigned char *pd = HighCol+sx; +TileNormMaker(TileNorm,pix_just_write) +TileFlipMaker(TileFlip,pix_just_write) - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - if (pack) - { - t=(pack&0x000f0000)>>16; sh_pix(0); - t=(pack&0x00f00000)>>20; sh_pix(1); - t=(pack&0x0f000000)>>24; sh_pix(2); - t=(pack&0xf0000000)>>28; sh_pix(3); - t=(pack&0x0000000f) ; sh_pix(4); - t=(pack&0x000000f0)>> 4; sh_pix(5); - t=(pack&0x00000f00)>> 8; sh_pix(6); - t=(pack&0x0000f000)>>12; sh_pix(7); - return 0; - } - return 1; // Tile blank -} #endif -#define tilepixelAS(mask,index,shift) \ - if (!(pd[index]&0xc0)) { t=pack&mask; if (t) pd[index]=(pal|(t>>shift)); } +// draw a sprite pixel, process operator colors +#define pix_sh(x) \ + if (!t); \ + else if (t==0xe) pd[x]=(pd[x]&0x3f)|0x80; /* hilight */ \ + else if (t==0xf) pd[x]= pd[x] |0xc0; /* shadow */ \ + else pd[x]=pal|t -static int TileNormAS(int sx,int addr,int pal) -{ - unsigned char *pd = HighCol+sx; - unsigned int pack=0; unsigned int t=0; +TileNormMaker(TileNormSH, pix_sh) +TileFlipMaker(TileFlipSH, pix_sh) - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - if (pack) - { - tilepixelAS(0x0000f000, 0, 12); - tilepixelAS(0x00000f00, 1, 8); - tilepixelAS(0x000000f0, 2, 4); - tilepixelAS(0x0000000f, 3, 0); - tilepixelAS(0xf0000000, 4, 28); - tilepixelAS(0x0f000000, 5, 24); - tilepixelAS(0x00f00000, 6, 20); - tilepixelAS(0x000f0000, 7, 16); - return 0; - } +#ifndef _ASM_DRAW_C +// draw a sprite pixel ignoring operator colors +#define pix_sh_noop(x) \ + if (t && t < 0xe) \ + pd[x]=pal|t - return 1; // Tile blank -} +TileNormMaker(TileNormSH_noop, pix_sh_noop) +TileFlipMaker(TileFlipSH_noop, pix_sh_noop) +#endif -static int TileFlipAS(int sx,int addr,int pal) -{ - unsigned char *pd = HighCol+sx; - unsigned int pack=0; unsigned int t=0; +// process operator pixels only, apply only on low pri tiles +#define pix_sh_onlyop(x) \ + if (t==0xe && (pd[x]&0x40)) pd[x]=(pd[x]&0x3f)|0x80; /* hilight */ \ + else if (t==0xf && (pd[x]&0x40)) pd[x]= pd[x] |0xc0; /* shadow */ - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - if (pack) - { - tilepixelAS(0x000f0000, 0, 16); - tilepixelAS(0x00f00000, 1, 20); - tilepixelAS(0x0f000000, 2, 24); - tilepixelAS(0xf0000000, 3, 28); - tilepixelAS(0x0000000f, 4, 0); - tilepixelAS(0x000000f0, 5, 4); - tilepixelAS(0x00000f00, 6, 8); - tilepixelAS(0x0000f000, 7, 12); - return 0; - } - return 1; // Tile blank -} +TileNormMaker(TileNormSH_onlyop_lp, pix_sh_onlyop) +TileFlipMaker(TileFlipSH_onlyop_lp, pix_sh_onlyop) -// there is a problem with transparent hi pri tiles (on layer), it will clear high bits -// and sprite tiles will be drawn needlessly. Hopefully that won't happen much.. -#define sh_pixAS(x) \ - if(!t); \ - else if(t==0xe) pd[x]=(unsigned char)((pd[x]&0x3f)|0x80); /* hilight */ \ - else if(t==0xf) pd[x]=(unsigned char)( pd[x] |0xc0); /* shadow */ \ - else if(!(pd[x]&0xc0)) pd[x]=(unsigned char)(pal|t) +// draw a sprite pixel (AS) +#define pix_as(x) \ + if (t && !(pd[x]&0x80)) pd[x]=pal|t -static int TileNormSHAS(int sx,int addr,int pal) -{ - unsigned int pack=0; unsigned int t=0; - unsigned char *pd = HighCol+sx; +TileNormMaker(TileNormAS, pix_as) +TileFlipMaker(TileFlipAS, pix_as) - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - if (pack) - { - t=(pack&0x0000f000)>>12; sh_pixAS(0); - t=(pack&0x00000f00)>> 8; sh_pixAS(1); - t=(pack&0x000000f0)>> 4; sh_pixAS(2); - t=(pack&0x0000000f) ; sh_pixAS(3); - t=(pack&0xf0000000)>>28; sh_pixAS(4); - t=(pack&0x0f000000)>>24; sh_pixAS(5); - t=(pack&0x00f00000)>>20; sh_pixAS(6); - t=(pack&0x000f0000)>>16; sh_pixAS(7); - return 0; - } +// draw a sprite pixel, skip operator colors (AS) +#define pix_sh_as_noop(x) \ + if (t && t < 0xe && !(pd[x]&0x80)) pd[x]=pal|t - return 1; // Tile blank -} +TileNormMaker(TileNormAS_noop, pix_sh_as_noop) +TileFlipMaker(TileFlipAS_noop, pix_sh_as_noop) -static int TileFlipSHAS(int sx,int addr,int pal) -{ - unsigned int pack=0; unsigned int t=0; - unsigned char *pd = HighCol+sx; +// mark pixel as sprite pixel (AS) +#define pix_sh_as_onlymark(x) \ + if (t) pd[x]|=0x80 - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - if (pack) - { - t=(pack&0x000f0000)>>16; sh_pixAS(0); - t=(pack&0x00f00000)>>20; sh_pixAS(1); - t=(pack&0x0f000000)>>24; sh_pixAS(2); - t=(pack&0xf0000000)>>28; sh_pixAS(3); - t=(pack&0x0000000f) ; sh_pixAS(4); - t=(pack&0x000000f0)>> 4; sh_pixAS(5); - t=(pack&0x00000f00)>> 8; sh_pixAS(6); - t=(pack&0x0000f000)>>12; sh_pixAS(7); - return 0; - } - return 1; // Tile blank -} +TileNormMaker(TileNormAS_onlymark, pix_sh_as_onlymark) +TileFlipMaker(TileFlipAS_onlymark, pix_sh_as_onlymark) // -------------------------------------------- @@ -512,7 +426,7 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells) static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache { struct PicoVideo *pvid=&Pico.video; - int tilex=0,ty=0,nametab,code=0; + int tilex,ty,nametab,code=0; int blank=-1; // The tile we know is blank // Find name table line: @@ -528,9 +442,6 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache } tilex=tstart<<1; - tend<<=1; - - ty=(Scanline&7)<<1; // Y-Offset into tile if (!(rendstatus & PDRAW_WND_DIFF_PRIO)) { // check the first tile code @@ -539,6 +450,9 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache if ((code>>15) != prio) return; } + tend<<=1; + ty=(Scanline&7)<<1; // Y-Offset into tile + // Draw tiles across screen: if (!sh) { @@ -571,7 +485,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache for (; tilex < tend; tilex++) { int addr=0,zero=0; - int pal, tmp, *zb; + int pal; code=Pico.vram[nametab+tilex]; if(code==blank) continue; @@ -582,15 +496,10 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache pal=((code>>9)&0x30); - zb = (int *)(HighCol+8+(tilex<<3)); - if(prio) { - tmp = *zb; - if(!(tmp&0x00000080)) tmp&=~0x000000c0; if(!(tmp&0x00008000)) tmp&=~0x0000c000; - if(!(tmp&0x00800000)) tmp&=~0x00c00000; if(!(tmp&0x80000000)) tmp&=~0xc0000000; - *zb++=tmp; tmp = *zb; - if(!(tmp&0x00000080)) tmp&=~0x000000c0; if(!(tmp&0x00008000)) tmp&=~0x0000c000; - if(!(tmp&0x00800000)) tmp&=~0x00c00000; if(!(tmp&0x80000000)) tmp&=~0xc0000000; - *zb++=tmp; + if (prio) { + int *zb = (int *)(HighCol+8+(tilex<<3)); + *zb++ &= 0x3f3f3f3f; + *zb &= 0x3f3f3f3f; } else { pal |= 0x40; } @@ -617,14 +526,7 @@ static void DrawTilesFromCacheShPrep(void) rendstatus |= PDRAW_SHHI_DONE; while (c--) { - int tmp = *zb; - if (!(tmp & 0x80808080)) *zb=tmp&0x3f3f3f3f; - else { - if(!(tmp&0x00000080)) tmp&=~0x000000c0; if(!(tmp&0x00008000)) tmp&=~0x0000c000; - if(!(tmp&0x00800000)) tmp&=~0x00c00000; if(!(tmp&0x80000000)) tmp&=~0xc0000000; - *zb=tmp; - } - zb++; + *zb++ &= 0x3f3f3f3f; } } @@ -671,10 +573,8 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim) addr+=(unsigned int)code>>25; // y offset into tile dx=(code>>16)&0x1ff; zb = HighCol+dx; - if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; - if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; - if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; - if(!(*zb&0x80)) *zb&=0x3f; zb++; if(!(*zb&0x80)) *zb&=0x3f; zb++; + *zb++ &= 0x3f; *zb++ &= 0x3f; *zb++ &= 0x3f; *zb++ &= 0x3f; + *zb++ &= 0x3f; *zb++ &= 0x3f; *zb++ &= 0x3f; *zb++ &= 0x3f; pal=((code>>9)&0x30); if (rlim-dx < 0) goto last_cut_tile; @@ -726,7 +626,7 @@ last_cut_tile: // Index + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size // Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -static void DrawSprite(int *sprite, int **hc, int sh, int as) +static void DrawSprite(int *sprite, int sh, int as) { int width=0,height=0; int row=0,code=0; @@ -747,25 +647,27 @@ static void DrawSprite(int *sprite, int **hc, int sh, int as) if (code&0x1000) row=(height<<3)-1-row; // Flip Y - tile=code&0x7ff; // Tile number - tile+=row>>3; // Tile number increases going down + tile=code + (row>>3); // Tile number increases going down delta=height; // Delta to increase tile by going right if (code&0x0800) { tile+=delta*(width-1); delta=-delta; } // Flip X - tile<<=4; tile+=(row&7)<<1; // Tile address + tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address + + pal=(code>>9)&0x30; - if (code&0x8000) { // high priority - cache it - *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>16)&0xf); - // we need all for accurate sprites, cached will be used to recover ones overwritten by high layer - if (!as) return; + // assume there will be no sprites with both normal and operator pixels.. + if ((code&0x8000) || (sh && pal == 0x30) || as) { + *HighCacheS_ptr++ = ((code&0x8000)<<16)|(tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|pal|((sprite[0]>>16)&0xf); + // we need all for accurate sprites, cached will be used to do proper priorities + if (!as && (code&0x8000)) return; } delta<<=4; // Delta of address - pal=((code>>9)&0x30)|((sh|as)<<6); + pal|=((sh|as)<<6); if (sh && (code&0x6000) == 0x6000) { - if(code&0x0800) fTileFunc=TileFlipSH; - else fTileFunc=TileNormSH; + if(code&0x0800) fTileFunc=TileFlipSH_noop; + else fTileFunc=TileNormSH_noop; } else { if(code&0x0800) fTileFunc=TileFlip; else fTileFunc=TileNorm; @@ -880,21 +782,29 @@ static void DrawSpritesFromCache(int *hc, int maxwidth, int prio, int sh) int pal; int (*fTileFunc)(int sx,int addr,int pal); - // *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>24)&0xf); + // prio[31]:tile[30:17]:flipx[16]:sx[15:6]:pal[5:4]:delta_width[3:0] - while((code=*hc++)) { + while ((code=*hc++)) + { pal=(code&0x30); delta=code&0xf; width=delta>>2; delta&=3; width++; delta++; // Width and height in tiles if (code&0x10000) delta=-delta; // Flip X delta<<=4; - tile=((unsigned int)code>>17)<<1; + tile=((unsigned int)code>>17)<<1; // also has prio sx=(code<<16)>>22; // sx can be negative (start offscreen), so sign extend - if(sh && pal == 0x30) { // - if(code&0x10000) fTileFunc=TileFlipSH; - else fTileFunc=TileNormSH; + if (sh && pal == 0x30) + { + if (code & 0x80000000) // hi priority + { + if(code&0x10000) fTileFunc=TileFlipSH; + else fTileFunc=TileNormSH; + } else { + if(code&0x10000) fTileFunc=TileFlipSH_onlyop_lp; + else fTileFunc=TileNormSH_onlyop_lp; + } } else { if(code&0x10000) fTileFunc=TileFlip; else fTileFunc=TileNorm; @@ -915,12 +825,16 @@ static void DrawSpritesFromCache(int *hc, int maxwidth, int prio, int sh) static void DrawSpritesFromCacheAS(int *hc, int maxwidth, int prio, int sh) { int code, tile, sx, delta, width; - int pal; + int pal, *hce, *hco; int (*fTileFunc)(int sx,int addr,int pal); - // *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>24)&0xf); + // prio[31]:tile[30:17]:flipx[16]:sx[15:6]:pal[5:4]:delta_width[3:0] - while((code=*hc++)) { + /* walk the sprite cache backwards.. */ + hco = hce = HighCacheS_ptr; + while (hce > hc) + { + code=*(--hce); pal=(code&0x30); delta=code&0xf; width=delta>>2; delta&=3; @@ -930,14 +844,67 @@ static void DrawSpritesFromCacheAS(int *hc, int maxwidth, int prio, int sh) tile=((unsigned int)code>>17)<<1; sx=(code<<16)>>22; // sx can be negative (start offscreen), so sign extend - if(sh && pal == 0x30) { // - if(code&0x10000) fTileFunc=TileFlipSHAS; - else fTileFunc=TileNormSHAS; + if (code & 0x80000000) // hi priority + { + if (sh && pal == 0x30) + { + if(code&0x10000) fTileFunc=TileFlipAS_noop; + else fTileFunc=TileNormAS_noop; + *(--hco) = code; /* save for later */ + } else { + if(code&0x10000) fTileFunc=TileFlipAS; + else fTileFunc=TileNormAS; + } } else { - if(code&0x10000) fTileFunc=TileFlipAS; - else fTileFunc=TileNormAS; + if(code&0x10000) fTileFunc=TileFlipAS_onlymark; + else fTileFunc=TileNormAS_onlymark; } + pal |= 0x80; + for (; width; width--,sx+=8,tile+=delta) + { + if(sx<=0) continue; + if(sx>=328) break; // Offscreen + + tile&=0x7fff; // Clip tile address + fTileFunc(sx,tile,pal); + } + } + + if (!sh) return; + + /* nasty 1: remove 'sprite' flags */ + { + int c = 320/4, *zb = (int *)(HighCol+8); + while (c--) + { + *zb++ &= 0x7f7f7f7f; + } + } + + /* nasty 2: loop once more and do operator colors */ + while ((code=*hco++)) + { + pal=(code&0x30); + if (pal != 0x30) continue; + delta=code&0xf; + width=delta>>2; delta&=3; + width++; delta++; + if (code&0x10000) delta=-delta; // Flip X + delta<<=4; + tile=((unsigned int)code>>17)<<1; + sx=(code<<16)>>22; + + if (code & 0x80000000) + { + if(code&0x10000) fTileFunc=TileFlipSH; + else fTileFunc=TileNormSH; + } else { + if(code&0x10000) fTileFunc=TileFlipSH_onlyop_lp; + else fTileFunc=TileNormSH_onlyop_lp; + } + + pal |= 0x80; for (; width; width--,sx+=8,tile+=delta) { if(sx<=0) continue; @@ -950,6 +917,7 @@ static void DrawSpritesFromCacheAS(int *hc, int maxwidth, int prio, int sh) } + // Index + 0 : ----hhvv -lllllll -------y yyyyyyyy // Index + 4 : -------x xxxxxxxx pccvhnnn nnnnnnnn // v @@ -1062,6 +1030,7 @@ static void DrawAllSprites(int *hcache, int maxwidth, int prio, int sh) if (PicoOpt & POPT_DIS_SPRITE_LIM) max_line_sprites = 80; + HighCacheS_ptr = hcache; ps = HighPreSpr; // Index + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size @@ -1108,10 +1077,10 @@ static void DrawAllSprites(int *hcache, int maxwidth, int prio, int sh) // Go through sprites backwards: for (i--; i>=0; i--) - DrawSprite(sprites[i],&hcache,sh,n); + DrawSprite(sprites[i],sh,n); // terminate cache list - *hcache = 0; + *HighCacheS_ptr = 0; } @@ -1349,7 +1318,7 @@ static int DrawDisplay(int sh, int as) DrawWindow((win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 1, sh); } else if (HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh, 328); - DrawAllSpritesHiPri(HighCacheS, maxw, 1, sh); + if (HighCacheS[0]) DrawAllSpritesHiPri(HighCacheS, maxw, 1, sh); #if 0 { diff --git a/Pico/Draw.s b/Pico/Draw.s index 3cafde9..f310253 100644 --- a/Pico/Draw.s +++ b/Pico/Draw.s @@ -1,9 +1,9 @@ @ vim:filetype=armasm -@ assembly "optimized" version of some funtions from draw.c +@ ARM assembly versions of some funtions from draw.c @ this is highly specialized, be careful if changing related C code! -@ (c) Copyright 2007, Grazvydas "notaz" Ignotas +@ (c) Copyright 2007-2008, Grazvydas "notaz" Ignotas @ All Rights Reserved .include "port_config.s" @@ -16,6 +16,7 @@ .extern rendstatus .extern DrawLineDest .extern DrawStripInterlace +.extern HighCacheS_ptr @ helper @@ -64,10 +65,8 @@ .endif ldreqb r4, [r1,#\offs] orrne r4, r3, r4 - strneb r4, [r1,#\offs] - tsteq r4, #0x80 andeq r4, r4, #0x3f - streqb r4, [r1,#\offs] + strb r4, [r1,#\offs] .endm @ TileNormShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits @@ -155,24 +154,17 @@ .else ands r4, r12, r2 .endif - beq 3f + beq 0f cmp r4, #0xe - beq 2f - bgt 1f - orr r4, r3, r4 - strb r4, [r1,#\ofs] - b 3f -1: - ldrb r4, [r1,#\ofs] @ 2ci - orr r4, r4, #0xc0 - strb r4, [r1,#\ofs] - b 3f -2: - ldrb r4, [r1,#\ofs] @ 2ci - bic r4, r4, #0xc0 - orr r4, r4, #0x80 + ldrgeb r4, [r1,#\ofs] + orrlt r4, r3, r4 @ normal + + biceq r4, r4, #0xc0 @ hilight + orreq r4, r4, #0x80 + orrgt r4, r4, #0xc0 @ shadow + strb r4, [r1,#\ofs] -3: +0: .endm @ TileFlipSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf @@ -199,6 +191,80 @@ TileDoShGenPixel 16, 7 @ #0x000f0000 .endm +.macro TileDoShGenPixel_noop shift ofs +.if \shift + and r4, r12, r2, lsr #\shift +.else + and r4, r12, r2 +.endif + sub r7, r4, #1 + cmp r7, #0xd + orrcc r4, r3, r4 @ 0-0xc (was 1-0xd) + strccb r4, [r1,#\ofs] +.endm + +.macro TileFlipSh_noop + TileDoShGenPixel_noop 16, 0 @ #0x000f0000 + TileDoShGenPixel_noop 20, 1 @ #0x00f00000 + TileDoShGenPixel_noop 24, 2 @ #0x0f000000 + TileDoShGenPixel_noop 28, 3 @ #0xf0000000 + TileDoShGenPixel_noop 0, 4 @ #0x0000000f + TileDoShGenPixel_noop 4, 5 @ #0x000000f0 + TileDoShGenPixel_noop 8, 6 @ #0x00000f00 + TileDoShGenPixel_noop 12, 7 @ #0x0000f000 +.endm + +.macro TileNormSh_noop + TileDoShGenPixel_noop 12, 0 @ #0x0000f000 + TileDoShGenPixel_noop 8, 1 @ #0x00000f00 + TileDoShGenPixel_noop 4, 2 @ #0x000000f0 + TileDoShGenPixel_noop 0, 3 @ #0x0000000f + TileDoShGenPixel_noop 28, 4 @ #0xf0000000 + TileDoShGenPixel_noop 24, 5 @ #0x0f000000 + TileDoShGenPixel_noop 20, 6 @ #0x00f00000 + TileDoShGenPixel_noop 16, 7 @ #0x000f0000 +.endm + +.macro TileDoShGenPixel_onlyop_lp shift ofs +.if \shift + ands r7, r12, r2, lsr #\shift +.else + ands r7, r12, r2 +.endif + ldrneb r4, [r1,#\ofs] + tstne r4, #0x40 + beq 0f + + cmp r7, #0xe + biceq r4, r4, #0xc0 @ hilight + orreq r4, r4, #0x80 + orrgt r4, r4, #0xc0 @ shadow + strgeb r4, [r1,#\ofs] +0: +.endm + +.macro TileFlipSh_onlyop_lp + TileDoShGenPixel_onlyop_lp 16, 0 @ #0x000f0000 + TileDoShGenPixel_onlyop_lp 20, 1 @ #0x00f00000 + TileDoShGenPixel_onlyop_lp 24, 2 @ #0x0f000000 + TileDoShGenPixel_onlyop_lp 28, 3 @ #0xf0000000 + TileDoShGenPixel_onlyop_lp 0, 4 @ #0x0000000f + TileDoShGenPixel_onlyop_lp 4, 5 @ #0x000000f0 + TileDoShGenPixel_onlyop_lp 8, 6 @ #0x00000f00 + TileDoShGenPixel_onlyop_lp 12, 7 @ #0x0000f000 +.endm + +.macro TileNormSh_onlyop_lp + TileDoShGenPixel_onlyop_lp 12, 0 @ #0x0000f000 + TileDoShGenPixel_onlyop_lp 8, 1 @ #0x00000f00 + TileDoShGenPixel_onlyop_lp 4, 2 @ #0x000000f0 + TileDoShGenPixel_onlyop_lp 0, 3 @ #0x0000000f + TileDoShGenPixel_onlyop_lp 28, 4 @ #0xf0000000 + TileDoShGenPixel_onlyop_lp 24, 5 @ #0x0f000000 + TileDoShGenPixel_onlyop_lp 20, 6 @ #0x00f00000 + TileDoShGenPixel_onlyop_lp 16, 7 @ #0x000f0000 +.endm + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -798,39 +864,25 @@ DrawTilesFromCache: b .dtfc_loop .dtfc_shadow_blank: - ldrb r4, [r1] @ 1ci - ldrb r12,[r1,#1] - tst r4, #0x80 - andeq r4, r4,#0x3f - streqb r4, [r1] - tst r12,#0x80 - ldrb r4, [r1,#2] - andeq r12,r12,#0x3f - streqb r12,[r1,#1] - tst r4, #0x80 - ldrb r12,[r1,#3] - andeq r4, r4,#0x3f - streqb r4, [r1,#2] - tst r12,#0x80 - ldrb r4, [r1,#4] - andeq r12,r12,#0x3f - streqb r12,[r1,#3] - tst r4, #0x80 - ldrb r12,[r1,#5] - andeq r4, r4,#0x3f - streqb r4, [r1,#4] - tst r12,#0x80 - ldrb r4, [r1,#6] - andeq r12,r12,#0x3f - streqb r12,[r1,#5] - tst r4, #0x80 - ldrb r12,[r1,#7] - andeq r4, r4,#0x3f - streqb r4, [r1,#6] - tst r12,#0x80 - andeq r12,r12,#0x3f - streqb r12,[r1,#7] - mov r12, #0xf + tst r1, #1 + ldrneb r4, [r1] + mov r6, #0x3f + and r4, r4, #0x3f + strneb r4, [r1], #1 + ldrh r4, [r1] + orr r6, r6, r6, lsl #8 + and r4, r4, r6 + strh r4, [r1], #2 + ldrh r4, [r1] + and r4, r4, r6 + strh r4, [r1], #2 + ldrh r4, [r1] + and r4, r4, r6 + strh r4, [r1], #2 + ldrh r4, [r1] + and r4, r4, r6 + streqh r4, [r1] + strneb r4, [r1] b .dtfc_loop .dtfc_cut_tile: @@ -865,31 +917,22 @@ DrawTilesFromCache: str r2, [r1] add r1, r11,#8 - mov r3, #320/4 - mov r7, #0x80 - orr r7, r7, r7, lsl #8 - orr r7, r7, r7, lsl #16 + mov r3, #320/4/4 mov r6, #0x3f orr r6, r6, r6, lsl #8 orr r6, r6, r6, lsl #16 .dtfc_loop_shprep: + ldmia r1, {r2,r4,r5,r7} subs r3, r3, #1 - bmi .dtfc_loop @ done - ldr r2, [r1] - tst r2, r7 - andeq r2, r2, r6 - streq r2, [r1], #4 - beq .dtfc_loop_shprep - tst r2, #0x80000000 - biceq r2, r2, #0xc0000000 - tst r2, #0x00800000 - biceq r2, r2, #0x00c00000 - tst r2, #0x00008000 - biceq r2, r2, #0x0000c000 - tst r2, #0x00000080 - biceq r2, r2, #0x000000c0 - str r2, [r1], #4 - b .dtfc_loop_shprep + and r2, r2, r6 + and r4, r4, r6 + and r5, r5, r6 + and r7, r7, r6 + stmia r1!,{r2,r4,r5,r7} + bne .dtfc_loop_shprep + + mvn r5, #0 @ r5=prevcode=-1 + b .dtfc_loop .pool @@ -998,6 +1041,9 @@ DrawSpritesFromCache: b .dsfc_inloop .dsfc_shadow: + tst r9, #0x80000000 + beq .dsfc_shadow_lowpri + cmp r2, r2, ror #4 beq .dsfc_singlec_sh @@ -1025,6 +1071,18 @@ DrawSpritesFromCache: TileSingleSh b .dsfc_inloop +.dsfc_shadow_lowpri: + tst r9, #0x10000 + bne .dsfc_TileFlip_sh_lp + +.dsfc_TileNorm_sh_lp: + TileNormSh_onlyop_lp + b .dsfc_inloop + +.dsfc_TileFlip_sh_lp: + TileFlipSh_onlyop_lp + b .dsfc_inloop + .pool @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -1032,12 +1090,12 @@ DrawSpritesFromCache: @ + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size @ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -.global DrawSprite @ unsigned int *sprite, int **hc, int sh, int acc_sprites +.global DrawSprite @ unsigned int *sprite, int sh, int acc_sprites DrawSprite: stmfd sp!, {r4-r9,r11,lr} - orr r8, r3, r2, lsl #4 + orr r8, r2, r1, lsl #4 ldr r3, [r0] @ sprite[0] ldr r7, =Scanline mov r6, r3, lsr #28 @@ -1061,20 +1119,23 @@ DrawSprite: subne r4, r4, #1 subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y - mov r8, r9, lsl #21 - mov r8, r8, lsr #21 - add r8, r8, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down - + add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down tst r9, #0x0800 mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1); rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now - mov r8, r8, lsl #4 + mov r8, r8, lsl #21 + mov r8, r8, lsr #17 and r7, r7, #7 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address tst r9, #0x8000 - bne .dspr_cache @ if(code&0x8000) // high priority - cache it + tsteq r9, #(1<<27) + bne .dspr_cache @ if(code&0x8000) || as + tst r6, #0x4000 + tstne r6, #0x2000 + tstne r9, #(1<<31) + bne .dspr_cache @ (sh && pal == 0x30) .dspr_continue: @ cache some stuff to avoid mem access @@ -1138,6 +1199,10 @@ DrawSprite: TileFlip r12 b .dspr_loop +.dspr_singlec_sh: + cmp r2, #0xe0000000 + bcs .dspr_loop @ operator tileline, ignore + .dspr_SingleColor: and r4, r2, #0xf orr r4, r3, r4 @@ -1160,28 +1225,17 @@ DrawSprite: @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern .dspr_TileNorm_sh: - TileNormSh + TileNormSh_noop b .dspr_loop .dspr_TileFlip_sh: - TileFlipSh - b .dspr_loop - -.dspr_singlec_sh: - cmp r2, #0xe0000000 - bcc .dspr_SingleColor @ normal tileline - tst r2, #0x10000000 - bne .dspr_sh_sh - TileSingleHi - b .dspr_loop - -.dspr_sh_sh: - TileSingleSh + TileFlipSh_noop b .dspr_loop .dspr_cache: - @ *(*hc)++ = (tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|((code>>9)&0x30)|((sprite[0]>>24)&0xf); + @ *HighCacheS_ptr++ = ((code&0x8000)<<16)|(tile<<16)|((code&0x0800)<<5)|((sx<<6)&0x0000ffc0)|pal|((sprite[0]>>16)&0xf); + ldr r1, =HighCacheS_ptr mov r4, r8, lsl #16 @ tile tst r9, #0x0800 orrne r4, r4, #0x10000 @ code&0x0800 @@ -1190,16 +1244,19 @@ DrawSprite: and r0, r9, #0x6000 orr r4, r4, r0, lsr #9 @ (code>>9)&0x30 mov r3, r3, lsl #12 - ldr r0, [r1] orr r4, r4, r3, lsr #28 @ (sprite[0]>>24)&0xf + ldr r0, [r1] + tst r9, #0x8000 + orrne r4, r4, #0x80000000 @ prio + str r4, [r0], #4 str r0, [r1] - tst r9, #(1<<27) - ldmeqfd sp!, {r4-r9,r11,lr} - bne .dspr_continue @ draw anyway if accurate sprites enabled - bxeq lr + and r0, r9, #(1<<27) @ as + teqne r0, #(1<<27) @ (code&0x8000) && !as + ldmnefd sp!, {r4-r9,r11,pc} + b .dspr_continue @ draw anyway if accurate sprites enabled @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -1229,19 +1286,17 @@ DrawWindow: ldr r6, =rendstatus ldr lr, =(Pico+0x10000) @ lr=Pico.vram - ldrb r6, [r6] + ldr r6, [r6] @ fetch the first code now ldrh r7, [lr, r12] ands r6, r6, #2 @ we care about bit 1 only orr r6, r6, r2 - bne .dw_no_sameprio - cmp r2, r7, lsr #15 - ldmnefd sp!, {r4-r11,pc} @ assume that whole window uses same priority + teqne r2, r7, lsr #15 @ do prio bits differ? + ldmnefd sp!, {r4-r11,pc} @ yes, assume that whole window uses same priority -.dw_no_sameprio: orr r6, r6, r3, lsl #8 @ shadow mode sub r8, r1, r0 @@ -1258,11 +1313,11 @@ DrawWindow: mov r8, r8, lsl #1 @ cells mvn r9, #0 @ r9=prevcode=-1 .endif - add r1, r11, r0, lsl #4 @ r1=pdest + add r1, r11, r0, lsl #4 @ r1=pdest mov r0, #0xf b .dwloop_enter - @ r4,r5 & r7 are scratch in this loop + @ r4,r5 are scratch in this loop .dwloop: add r1, r1, #8 .dwloop_nor1: @@ -1328,24 +1383,13 @@ DrawWindow: orreq r3, r3, #0x40 beq .dw_shadow_done ldr r4, [r1] - tst r4, #0x00000080 - biceq r4, r4, #0x000000c0 - tst r4, #0x00008000 - biceq r4, r4, #0x0000c000 - tst r4, #0x00800000 - biceq r4, r4, #0x00c00000 - tst r4, #0x80000000 - biceq r4, r4, #0xc0000000 + mov r5, #0x3f + orr r5, r5, r5, lsl #8 + orr r5, r5, r5, lsl #16 + and r4, r4, r5 str r4, [r1] ldr r4, [r1,#4] - tst r4, #0x00000080 - biceq r4, r4, #0x000000c0 - tst r4, #0x00008000 - biceq r4, r4, #0x0000c000 - tst r4, #0x00800000 - biceq r4, r4, #0x00c00000 - tst r4, #0x80000000 - biceq r4, r4, #0xc0000000 + and r4, r4, r5 str r4, [r1,#4] b .dw_shadow_done diff --git a/platform/gp2x/gp2x.c b/platform/gp2x/gp2x.c index 6db1d41..5f38314 100644 --- a/platform/gp2x/gp2x.c +++ b/platform/gp2x/gp2x.c @@ -201,12 +201,12 @@ void gp2x_pd_clone_buffer2(void) unsigned long gp2x_joystick_read(int allow_usb_joy) { int i; - unsigned long value=(gp2x_memregs[0x1198>>1] & 0x00FF); + unsigned long value=(gp2x_memregs[0x1198>>1] & 0x00FF); // GPIO M if(value==0xFD) value=0xFA; if(value==0xF7) value=0xEB; if(value==0xDF) value=0xAF; if(value==0x7F) value=0xBE; - value = ~((gp2x_memregs[0x1184>>1] & 0xFF00) | value | (gp2x_memregs[0x1186>>1] << 16)); + value = ~((gp2x_memregs[0x1184>>1] & 0xFF00) | value | (gp2x_memregs[0x1186>>1] << 16)); // C D if (allow_usb_joy && num_of_joys > 0) { // check the usb joy as well.. -- 2.39.2