From 6bfa97ff7816e7c3bb41c230c34666f48fac48f2 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 18 Sep 2020 00:18:59 +0200 Subject: [PATCH] vdp rendering, fix highlight op on shadow --- pico/draw.c | 175 +++++++++++++++++++++++++------------------ pico/draw_arm.S | 194 ++++++++++++++++++------------------------------ 2 files changed, 174 insertions(+), 195 deletions(-) diff --git a/pico/draw.c b/pico/draw.c index ed818546..ae110461 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -19,14 +19,27 @@ * AS is enabled by user and takes priority over "sonic mode". * * since renderer always draws line in 8bit mode, there are 2 spare bits: - * b \ mode: s/h sonic - * 00 normal pal index - * 01 shadow pal index - * 10 hilight+op spr pal index - * 11 shadow +op spr pal index + * b \ mode: s/h sonic + * 00 normal pal index + * 01 hilight pal index + * 10 shadow pal index + * 11 hilight|shadow=normal pal index + * + * sprite ops can only be correctly done after the plane rendering s/h state is + * known since the op result changes if there's at least one high prio plane. + * sprite op rendering is deferred until this is known, and hilight is used as + * mark since it can't occur before sprite ops: + * x1 op marker pal index + * + * low prio s/h rendering: + * - plane and non-op sprite pixels have shadow + * - sprite op pixel rendering is marked with hilight (deferred) + * high prio s/h rendering: + * - plane and non-op sprite pixels are normal + * - all op sprite pixels (marked low, or high prio) are rendered * * not handled properly: - * - hilight op on shadow tile + * - high prio s/h ops overlapping low prio sprite shows low sprite, not A,B,G */ #include "pico_int.h" @@ -49,10 +62,15 @@ static int HighPreSpr[80*2+1]; // slightly preprocessed sprites unsigned int VdpSATCache[128]; // VDP sprite cache (1st 32 sprite attr bits) -#define LF_PLANE_1 (1 << 0) +// NB don't change any defines without checking their usage in ASM + +#define LF_PLANE (1 << 0) // must be = 1 #define LF_SH (1 << 1) // must be = 2 #define LF_FORCE (1 << 2) +#define LF_PLANE_A 0 +#define LF_PLANE_B 1 + #define SPRL_HAVE_HI 0x80 // have hi priority sprites #define SPRL_HAVE_LO 0x40 // *lo* #define SPRL_MAY_HAVE_OP 0x20 // may have operator sprites on the line @@ -61,6 +79,7 @@ unsigned int VdpSATCache[128]; // VDP sprite cache (1st 32 sprite attr bits) #define SPRL_TILE_OVFL 0x04 // tile limit exceeded on previous line #define SPRL_HAVE_MASK0 0x02 // have sprite with x == 0 in 1st slot #define SPRL_MASKED 0x01 // lo prio masking by sprite with x == 0 active + unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; // sprite_count, ^flags, tile_count, sprites_total, [spritep]..., last_width int rendstatus_old; @@ -141,34 +160,35 @@ static void funcname(unsigned char *pd, unsigned int pack, int pal) \ TileFlipMaker_(pix_func,) #define TileNormMakerAS(funcname, pix_func) \ -static unsigned funcname(unsigned char *pd, unsigned m, unsigned int pack, int pal) \ +static unsigned funcname(unsigned m, unsigned char *pd, unsigned int pack, int pal) \ TileNormMaker_(pix_func,m) #define TileFlipMakerAS(funcname, pix_func) \ -static unsigned funcname(unsigned char *pd, unsigned m, unsigned int pack, int pal) \ +static unsigned funcname(unsigned m, unsigned char *pd, unsigned int pack, int pal) \ TileFlipMaker_(pix_func,m) +// draw layer or non-s/h sprite pixels (no operator colors) #define pix_just_write(x) \ if (t) pd[x]=pal|t -TileNormMaker(TileNorm,pix_just_write) -TileFlipMaker(TileFlip,pix_just_write) +TileNormMaker(TileNorm, pix_just_write) +TileFlipMaker(TileFlip, pix_just_write) #ifndef _ASM_DRAW_C -// draw a sprite pixel, process operator colors +// draw sprite pixels, process operator colors #define pix_sh(x) \ if (!t); \ - else if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ + else if (t>=0xe) pd[x]|=(t-1)<<6; /* 80 shadow, 40 hilight */ \ else pd[x]=pal|t TileNormMaker(TileNormSH, pix_sh) TileFlipMaker(TileFlipSH, pix_sh) -// draw a sprite pixel, mark operator colors +// draw sprite pixels, mark but don't process operator colors #define pix_sh_markop(x) \ if (!t); \ - else if (t>=0xe) pd[x]|=0x80; \ + else if (t>=0xe) pd[x]|=0x40; \ else pd[x]=pal|t TileNormMaker(TileNormSH_markop, pix_sh_markop) @@ -176,10 +196,10 @@ TileFlipMaker(TileFlipSH_markop, pix_sh_markop) #endif -// process operator pixels only, apply only on low pri tiles and other op pixels +// draw low prio sprite operator pixels if visible (i.e. marked) #define pix_sh_onlyop(x) \ - if (t>=0xe && (pd[x]&0xc0)) \ - pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ + if (t>=0xe && (pd[x]&0x40)) \ + pd[x]=(pd[x]&0xbf)|((t-1)<<6) #ifndef _ASM_DRAW_C @@ -190,24 +210,26 @@ TileFlipMaker(TileFlipSH_onlyop_lp, pix_sh_onlyop) // AS: sprite mask bits in m shifted to bits 8-15, see DrawSpritesHiAS -// draw a sprite pixel (AS) +// draw high prio sprite pixels (AS) #define pix_as(x) \ if (t && (m & (1<<(x+8)))) m &= ~(1<<(x+8)), pd[x] = pal | t TileNormMakerAS(TileNormAS, pix_as) TileFlipMakerAS(TileFlipAS, pix_as) -// draw a sprite pixel, process operator colors (AS) +// draw high prio sprite pixels, process operator colors (AS) +// sprite+planes: h+s->n, h+[nh]->h, s+[nhs]->s, hence mask h before op #define pix_sh_as(x) \ if (t && (m & (1<<(x+8)))) { \ m &= ~(1<<(x+8)); \ - if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ + if (t>=0xe) pd[x]=(pd[x]&0xbf)|((t-1)<<6); \ else pd[x] = pal | t; \ } TileNormMakerAS(TileNormSH_AS, pix_sh_as) TileFlipMakerAS(TileFlipSH_AS, pix_sh_as) +// draw only sprite operator pixels (AS) #define pix_sh_as_onlyop(x) \ if (t && (m & (1<<(x+8)))) { \ m &= ~(1<<(x+8)); \ @@ -217,7 +239,7 @@ TileFlipMakerAS(TileFlipSH_AS, pix_sh_as) TileNormMakerAS(TileNormSH_AS_onlyop_lp, pix_sh_as_onlyop) TileFlipMakerAS(TileFlipSH_AS_onlyop_lp, pix_sh_as_onlyop) -// mark pixel as sprite pixel (AS) +// mark low prio sprite pixels (AS) #define pix_sh_as_onlymark(x) \ if (t) m &= ~(1<<(x+8)) @@ -236,7 +258,7 @@ TileFlipMaker(TileFlip_and, pix_and) #define pix_sh_as_and(x) /* XXX is there S/H with forced draw? */ \ if (m & (1<<(x+8))) { \ m &= ~(1<<(x+8)); \ - if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ + if (t>=0xe) pd[x]=(pd[x]&0xbf)|((t-1)<<6); \ else pd[x] = (pd[x] & 0xc0) | (pd[x] & (pal | t)); \ } @@ -252,10 +274,9 @@ static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) unsigned char *pd = Pico.est.HighCol; int tilex,dx,ty,code=0,addr=0,cells; int oldcode=-1,blank=-1; // The tile we know is blank - int pal=0,sh; + int pal=0; // Draw tiles across screen: - sh = (lflags & LF_SH) << 5; // 0x40 tilex=((-ts->hscroll)>>3)+cellskip; ty=(ts->line&7)<<1; // Y-Offset into tile dx=((ts->hscroll-1)&7)+1; @@ -268,14 +289,14 @@ static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) unsigned int pack; code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; - if (code == blank) - continue; if ((code >> 15) | (lflags & LF_FORCE)) { // high priority tile int cval = code | (dx<<16) | (ty<<25); if(code&0x1000) cval^=7<<26; *ts->hc++ = cval; // cache it continue; } + if (code == blank) + continue; if (code!=oldcode) { oldcode = code; @@ -284,7 +305,7 @@ static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) addr+=ty; if (code&0x1000) addr^=0xe; // Y-flip - pal=((code>>9)&0x30)|sh; + pal=((code>>9)&0x30)|((lflags&LF_SH)<<6); // shadow } pack = *(unsigned int *)(PicoMem.vram + addr); @@ -341,20 +362,20 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) } code=PicoMem.vram[ts->nametab+nametabadd+(tilex&ts->xmask)]; - if ((code<<16|ty)==blank) continue; if (code>>15) { // high priority tile int cval = code | (dx<<16) | (ty<<25); if(code&0x1000) cval^=7<<26; *ts->hc++ = cval; // cache it continue; } + if ((code<<16|ty)==blank) continue; if (code!=oldcode) { oldcode = code; // Get tile address/2: addr=(code&0x7ff)<<4; - pal=((code>>9)&0x30)|((plane_sh<<5)&0x40); + pal=((code>>9)&0x30)|((plane_sh&LF_SH)<<6); // shadow } if (code & 0x1000) ty ^= 0xe; // Y-flip @@ -378,7 +399,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) #ifndef _ASM_DRAW_C static #endif -void DrawStripInterlace(struct TileStrip *ts) +void DrawStripInterlace(struct TileStrip *ts, int plane_sh) { unsigned char *pd = Pico.est.HighCol; int tilex=0,dx=0,ty=0,code=0,addr=0,cells; @@ -412,8 +433,7 @@ void DrawStripInterlace(struct TileStrip *ts) addr=(code&0x3ff)<<5; if (code&0x1000) addr+=30-ty; else addr+=ty; // Y-flip -// pal=Pico.cram+((code>>9)&0x30); - pal=((code>>9)&0x30); + pal=((code>>9)&0x30)|((plane_sh&LF_SH)<<6); // shadow } pack = *(unsigned int *)(PicoMem.vram + addr); @@ -460,8 +480,8 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, } // Find name table: - if (plane_sh&1) ts.nametab=(pvid->reg[4]&0x07)<<12; // B - else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A + if (plane_sh&LF_PLANE) ts.nametab=(pvid->reg[4]&0x07)<<12; // B + else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A htab=pvid->reg[13]<<9; // Horizontal scroll table address switch (pvid->reg[11]&3) { @@ -469,20 +489,20 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, case 2: htab += (est->DrawScanline<<1) & ~0x0f; break; // Offset by tile case 3: htab += (est->DrawScanline<<1); break; // Offset by line } - htab+=plane_sh&1; // A or B + htab+=plane_sh&LF_PLANE; // A or B // Get horizontal scroll value, will be masked later ts.hscroll = PicoMem.vram[htab & 0x7fff]; if((pvid->reg[12]&6) == 6) { // interlace mode 2 - vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value + vscroll = PicoMem.vsram[plane_sh&LF_PLANE]; // Get vertical scroll value // Find the line in the name table ts.line=(vscroll+(est->DrawScanline<<1))&((ymask<<1)|1); ts.nametab+=(ts.line>>4)<reg[11]&4) { // shit, we have 2-cell column based vscroll // luckily this doesn't happen too often @@ -490,10 +510,10 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells, // vscroll value for leftmost cells in case of hscroll not on 16px boundary // XXX it's unclear what exactly the hw is doing. Continue reading where it // stopped last seems to work best (H40: 0x50 (wrap->0x00), H32 0x40). - plane_sh |= PicoMem.vsram[(pvid->reg[12]&1?0x00:0x20) + (plane_sh&1)] << 16; + plane_sh |= PicoMem.vsram[(pvid->reg[12]&1?0x00:0x20) + (plane_sh&LF_PLANE)] << 16; DrawStripVSRam(&ts, plane_sh, cellskip); } else { - vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value + vscroll = PicoMem.vsram[plane_sh&LF_PLANE]; // Get vertical scroll value // Find the line in the name table ts.line=(vscroll+est->DrawScanline)&ymask; @@ -591,10 +611,10 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, if (prio) { int *zb = (int *)(est->HighCol+8+(tilex<<3)); - *zb++ &= 0xbfbfbfbf; - *zb &= 0xbfbfbfbf; + *zb++ &= 0x7f7f7f7f; + *zb &= 0x7f7f7f7f; } else { - pal |= 0x40; + pal |= 0x80; } // Get tile address/2: @@ -626,7 +646,7 @@ static void DrawTilesFromCacheShPrep(void) Pico.est.rendstatus |= PDRAW_SHHI_DONE; while (c--) { - *zb++ &= 0xbfbfbfbf; + *zb++ &= 0x7f7f7f7f; } } @@ -651,7 +671,7 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est while ((code=*hc++)) { // Get tile address/2: addr = (code & 0x7ff) << 4; - addr += code >> 25; // y offset into tile + addr += (unsigned int)code >> 25; // y offset into tile pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) @@ -676,8 +696,8 @@ static void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est addr+=(unsigned int)code>>25; // y offset into tile dx=(code>>16)&0x1ff; zb = est->HighCol+dx; - *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; - *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; + *zb++ &= 0x7f; *zb++ &= 0x7f; *zb++ &= 0x7f; *zb++ &= 0x7f; + *zb++ &= 0x7f; *zb++ &= 0x7f; *zb++ &= 0x7f; *zb++ &= 0x7f; pack = *(unsigned int *)(PicoMem.vram + addr); if (!pack) @@ -765,7 +785,7 @@ static void DrawSprite(int *sprite, int sh, int w) delta<<=4; // Delta of address pal=(code>>9)&0x30; - pal|=sh<<6; + pal|=sh<<7; // shadow if (sh && (code&0x6000) == 0x6000) { if(code&0x0800) fTileFunc=TileFlipSH_markop; @@ -960,13 +980,13 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) static void DrawSpritesHiAS(unsigned char *sprited, int sh) { - static unsigned (*tilefuncs[2][2][2])(unsigned char *, unsigned, unsigned, int) = { + static unsigned (*tilefuncs[2][2][2])(unsigned, unsigned char *, unsigned, int) = { { {TileNormAS_onlymark, TileFlipAS_onlymark}, {TileNormAS, TileFlipAS} }, { {TileNormSH_AS_onlyop_lp, TileFlipSH_AS_onlyop_lp}, {TileNormSH_AS, TileFlipSH_AS} } }; // [sh?][hi?][flip?] - unsigned (*fTileFunc)(unsigned char *pd, unsigned m, unsigned int pack, int pal); + unsigned (*fTileFunc)(unsigned m, unsigned char *pd, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; - unsigned char mb[1+320/8+1]; + unsigned char mb[sizeof(DefHighCol)/8]; unsigned char *p, *mp; unsigned m; int entry, cnt; @@ -1023,7 +1043,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) m |= mp[1] << 8; // next mask byte // shift mask bits to bits 8-15 for easier load/store handling - m = fTileFunc(pd + sx, m << (8-(sx&0x7)), pack, pal) >> (8-(sx&0x7)); + m = fTileFunc(m << (8-(sx&0x7)), pd + sx, pack, pal) >> (8-(sx&0x7)); } *mp = m; // write last mask byte } @@ -1035,10 +1055,9 @@ static void DrawStripForced(struct TileStrip *ts, int lflags, int cellskip) unsigned char *pd = Pico.est.HighCol; int tilex,dx,ty,code=0,addr=0,cells; int oldcode=-1; - int pal=0,sh; + int pal=0; // Draw tiles across screen: - sh = (lflags & LF_SH) << 5; // 0x40 tilex=((-ts->hscroll)>>3)+cellskip; ty=(ts->line&7)<<1; // Y-Offset into tile dx=((ts->hscroll-1)&7)+1; @@ -1059,7 +1078,7 @@ static void DrawStripForced(struct TileStrip *ts, int lflags, int cellskip) addr+=ty; if (code&0x1000) addr^=0xe; // Y-flip - pal=((code>>9)&0x30)|sh; + pal=((code>>9)&0x30)|((lflags & LF_SH) << 6); } pack = *(unsigned int *)(PicoMem.vram + addr); @@ -1113,7 +1132,7 @@ static void DrawStripVSRamForced(struct TileStrip *ts, int plane_sh, int cellski // Get tile address/2: addr=(code&0x7ff)<<4; - pal=((code>>9)&0x30)|((plane_sh<<5)&0x40); + pal=((code>>9)&0x30)|((plane_sh&LF_SH)<<6); // shadow } if (code & 0x1000) ty ^= 0xe; // Y-flip @@ -1172,7 +1191,7 @@ static void DrawLayerForced(int plane_sh, int cellskip, int maxcells, ts.line=(vscroll+(est->DrawScanline<<1))&((ymask<<1)|1); ts.nametab+=(ts.line>>4)<reg[11]&4) { // shit, we have 2-cell column based vscroll // luckily this doesn't happen too often @@ -1196,9 +1215,9 @@ static void DrawLayerForced(int plane_sh, int cellskip, int maxcells, // rather messy (XXX revisit layer compositing) static void DrawSpritesForced(unsigned char *sprited) { - unsigned (*fTileFunc)(unsigned char *pd, unsigned m, unsigned int pack, int pal); + unsigned (*fTileFunc)(unsigned m, unsigned char *pd, unsigned int pack, int pal); unsigned char *pd = Pico.est.HighCol; - unsigned char mb[1+320/8+1]; + unsigned char mb[sizeof(DefHighCol)/8]; unsigned char *p, *mp; unsigned m; int entry, cnt; @@ -1256,7 +1275,7 @@ static void DrawSpritesForced(unsigned char *sprited) m |= mp[1] << 8; // next mask byte // shift mask bits to bits 8-15 for easier load/store handling - m = fTileFunc(pd + sx, m << (8-(sx&0x7)), pack, pal) >> (8-(sx&0x7)); + m = fTileFunc(m << (8-(sx&0x7)), pd + sx, pack, pal) >> (8-(sx&0x7)); } *mp = m; // write last mask byte } @@ -1425,7 +1444,7 @@ void BackFill(int reg7, int sh, struct PicoEState *est) // Start with a blank scanline (background colour): back=reg7&0x3f; - back|=sh<<6; + back|=sh<<7; // shadow back|=back<<8; back|=back<<16; @@ -1460,7 +1479,7 @@ void PicoDoHighPal555_8bit(int sh, int line, struct PicoEState *est) // treat it like it was 4-bit per channel, since in s/h mode it somewhat is that. // otherwise intensity difference between this and s/h will be wrong t |= (t >> 4) & 0x08610861; // 0x18e318e3 - dpal[i] = t; + dpal[i] = dpal[0xc0/2 + i] = t; } // norm: xxx0, sh: 0xxx, hi: 0xxx + 7 @@ -1468,13 +1487,18 @@ void PicoDoHighPal555_8bit(int sh, int line, struct PicoEState *est) { // shadowed pixels for (i = 0; i < 0x40 / 2; i++) - dpal[0x40/2 | i] = dpal[0xc0/2 | i] = (dpal[i] >> 1) & 0x738e738e; + dpal[0x80/2 + i] = (dpal[i] >> 1) & 0x738e738e; // hilighted pixels for (i = 0; i < 0x40 / 2; i++) { t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e; // 0x7bef7bef; t |= (t >> 4) & 0x08610861; - dpal[0x80/2 | i] = t; + dpal[0x40/2 + i] = t; } + // pixels in color 14 always appear normal (hw bug?) + unsigned short *hpal = est->HighPal; + hpal[0x80 + 0x0e] = hpal[0x40 + 0x0e] = hpal[0x0e]; + hpal[0x80 + 0x1e] = hpal[0x40 + 0x1e] = hpal[0x1e]; + hpal[0x80 + 0x2e] = hpal[0x40 + 0x2e] = hpal[0x2e]; } } @@ -1492,14 +1516,14 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est) for (i = 0; i < 0x40 / 2; i++) { t = spal[i]; #ifdef USE_BGR555 - t = ((t & 0x000e000e)<< 1) | ((t & 0x00e000e0)<<3) | ((t & 0x0e000e00)<<4); + t = ((t & 0x0e000e00)<< 3) | ((t & 0x00e000e0)<<2) | ((t & 0x000e000e)<<1); #else t = ((t & 0x000e000e)<<12) | ((t & 0x00e000e0)<<3) | ((t & 0x0e000e00)>>7); #endif // treat it like it was 4-bit per channel, since in s/h mode it somewhat is that. // otherwise intensity difference between this and s/h will be wrong t |= (t >> 4) & 0x08610861; // 0x18e318e3 - dpal[i] = t; + dpal[i] = dpal[0xc0/2 + i] = t; } // norm: xxx0, sh: 0xxx, hi: 0xxx + 7 @@ -1507,13 +1531,18 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est) { // shadowed pixels for (i = 0; i < 0x40 / 2; i++) - dpal[0x40/2 | i] = dpal[0xc0/2 | i] = (dpal[i] >> 1) & 0x738e738e; + dpal[0x80/2 + i] = (dpal[i] >> 1) & 0x738e738e; // hilighted pixels for (i = 0; i < 0x40 / 2; i++) { t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e; // 0x7bef7bef; t |= (t >> 4) & 0x08610861; - dpal[0x80/2 | i] = t; + dpal[0x40/2 + i] = t; } + // pixels in color 14 always appear normal (hw bug?) + unsigned short *hpal = est->HighPal; + hpal[0x80 + 0x0e] = hpal[0x40 + 0x0e] = hpal[0x0e]; + hpal[0x80 + 0x1e] = hpal[0x40 + 0x1e] = hpal[0x1e]; + hpal[0x80 + 0x2e] = hpal[0x40 + 0x2e] = hpal[0x2e]; } } @@ -1636,11 +1665,11 @@ static int DrawDisplay(int sh) /* - layer B low - */ if (!(pvid->debug_p & PVD_KILL_B)) { - lflags = LF_PLANE_1 | (sh << 1); + lflags = LF_PLANE_B | (sh<<1); DrawLayer(lflags, HighCacheB, 0, maxcells, est); } /* - layer A low - */ - lflags = 0 | (sh << 1); + lflags = LF_PLANE_A | (sh<<1); if (pvid->debug_p & PVD_KILL_A) ; else if (hvwind == 1) @@ -1680,7 +1709,7 @@ static int DrawDisplay(int sh) else if (est->rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(1, sh); // have sprites without layer pri bit ontop of sprites with that bit - else if ((sprited[1] & 0xd0) == 0xd0 && (PicoIn.opt & POPT_ACC_SPRITES)) + else if ((sprited[1] & SPRL_LO_ABOVE_HI) && (PicoIn.opt & POPT_ACC_SPRITES)) DrawSpritesHiAS(sprited, sh); else if (sh && (sprited[1] & SPRL_MAY_HAVE_OP)) DrawSpritesSHi(sprited, est); @@ -1689,10 +1718,10 @@ static int DrawDisplay(int sh) #ifdef FORCE if (pvid->debug_p & PVD_FORCE_B) { - lflags = LF_PLANE_1 | (sh << 1); + lflags = LF_PLANE_B | (sh<<1); DrawLayerForced(lflags, 0, maxcells, est); } else if (pvid->debug_p & PVD_FORCE_A) { - lflags = (sh << 1); + lflags = LF_PLANE_A | (sh<<1); DrawLayerForced(lflags, 0, maxcells, est); } else if (pvid->debug_p & PVD_FORCE_S) DrawSpritesForced(sprited); diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 0579006c..54d02277 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -76,7 +76,7 @@ .endif ldreqb r4, [r1,#\offs] orrne r4, r3, r4 - andeq r4, r4, #0xbf + andeq r4, r4, #0x7f strb r4, [r1,#\offs] .endm @@ -108,52 +108,48 @@ @ TileSingleSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx; r12: helper pattern 0xf .macro TileSingleSh tst r0, #1 @ not aligned? - mov r7, #0x00c000 - orr r7, r7, #0xc0 - ldrneb r4, [r1] - ldreqh r4, [r1] - orr r4, r4, r7 - strneb r4, [r1], #1 - streqh r4, [r1], #2 - ldrh r4, [r1] - orr r4, r4, r7 - strh r4, [r1], #2 - ldrh r4, [r1] - orr r4, r4, r7 - strh r4, [r1], #2 - ldrh r4, [r1] - orr r4, r4, r7 - strh r4, [r1], #2 - ldrneb r4, [r1] - orr r4, r4, r7 - strneb r4, [r1], #1 + mov r7, #0x008000 + orr r7, r7, #0x80 + ldrneb r4, [r1], #1 + ldreqh r4, [r1], #2 @ 1ci + ldrh r12, [r1], #2 + orr r4, r4, r7 + strneb r4, [r1, #-3] + streqh r4, [r1, #-4] + ldrh r4, [r1], #2 + orr r12, r12, r7 + strh r12, [r1, #-4] + ldrh r12, [r1], #2 + orr r4, r4, r7 + strh r4, [r1, #-4] + ldrneb r4, [r1] + orr r12, r12, r7 + strh r12, [r1, #-2] + orrne r4, r4, r7 + strneb r4, [r1], #1 + mov r12, #0xf .endm @ TileSingleHi (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf .macro TileSingleHi tst r1, #1 @ not aligned? - mov r7, #0x008000 - orr r7, r7, #0x80 + mov r7, #0x004000 + orr r7, r7, #0x40 ldrneb r4, [r1], #1 ldreqh r4, [r1], #2 @ 1ci ldrh r12, [r1], #2 - bic r4, r4, r7, lsr #1 orr r4, r4, r7 strneb r4, [r1, #-3] streqh r4, [r1, #-4] ldrh r4, [r1], #2 - bic r12, r12, r7, lsr #1 orr r12, r12, r7 strh r12, [r1, #-4] ldrh r12, [r1], #2 - bic r4, r4, r7, lsr #1 orr r4, r4, r7 strh r4, [r1, #-4] ldrneb r4, [r1] - bic r12, r12, r7, lsr #1 orr r12, r12, r7 strh r12, [r1, #-2] - bicne r4, r4, r7, lsr #1 orrne r4, r4, r7 strneb r4, [r1], #1 mov r12, #0xf @@ -170,7 +166,7 @@ ldrgeb r7, [r1,#\ofs] orrlt r7, r3, r4 @ normal - bicge r7, r7, #0xc0 + subge r4, r4, #1 orrge r7, r7, r4, lsl #6 strb r7, [r1,#\ofs] 0: @@ -210,7 +206,7 @@ cmp r4, #0xe ldrgeb r4, [r1,#\ofs] orrlt r4, r3, r4 - orrge r4, r4, #0x80 + orrge r4, r4, #0x40 strb r4, [r1,#\ofs] 0: .endm @@ -247,8 +243,9 @@ cmp r7, #0xe blt 0f - tst r4, #0xc0 - bicne r4, r4, #0xc0 + tst r4, #0x40 + bicne r4, r4, #0x40 + subne r7, r7, #1 orrne r4, r4, r7, lsl #6 strneb r4, [r1,#\ofs] 0: @@ -395,7 +392,7 @@ DrawLayer: mov r3, #0 orrmi r10,r10, #1<<23 @ r10=cells[31:24]|sh[23]|hi_not_empty[22] orrcs r10,r10, #1<<20 @ |had_output[21]|force[20]|ty[15:0] - movmi r3, #0x40 @ default to shadowed pal on sh mode + movmi r3, #0x80 @ default to shadowed pal on sh mode cmp r7, #8 addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll @@ -447,7 +444,7 @@ DrawLayer: ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - bic r7, r3, #0x3f + bic r7, r3, #0x7f and r3, r9, #0x6000 add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); @@ -483,23 +480,7 @@ DrawLayer: strneb r4, [r1], #1 @ have a remaining unaligned pixel? b .dsloop_subr1 -.DrawStrip_hiprio_maybempt: - cmp r7, r9 - beq .dsloop @ must've been empty, otherwise we wouldn't get here - movs r2, r7, lsl #20 @ if (code&0x1000) - mov r2, r2, lsl #1 - add r2, r2, r10, lsl #17 - mov r2, r2, lsr #17 - eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - mov r9, r7 @ remember code - tst r2, r2 - beq .dsloop - orr r10, r10, #1<<22 - .DrawStrip_hiprio: - tst r10, #0x00d00000 @ sh[23]|hi_not_empty[22]|force[20] - beq .DrawStrip_hiprio_maybempt sub r0, r1, r11 orr r7, r7, r0, lsl #16 orr r7, r7, r10, lsl #25 @ (ty<<25) @@ -558,10 +539,10 @@ DrawLayer: ldreq r3, [r1, #0x40] @ r3=vsram[0x20..0x21] str r3, [r1, #0x7c] @ vsram[0x3e..0x3f]=r3 0: - tst r9, #1<<31 + tst r9, #1<<30 mov r3, #0 orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) - movne r3, #0x40 @ default to shadowed pal on sh mode + movne r3, #0x80 @ default to shadowed pal on sh mode and r9, r9, #0xff00 add r8, r8, r9, lsr #8 @ tilex+=cellskip @@ -630,7 +611,7 @@ DrawLayer: ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels - bic r7, r3, #0x3f + bic r7, r3, #0x7f and r3, r9, #0x6000 add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); @@ -667,8 +648,6 @@ DrawLayer: b .dsloop_vs_subr1 .DrawStrip_vs_hiprio: - tst r10, #0x00c00000 - beq .DrawStrip_vs_hiprio_maybempt sub r0, r1, r11 orr r7, r7, r0, lsl #16 orr r7, r7, r10, lsl #25 @ (ty<<25) @@ -678,21 +657,6 @@ DrawLayer: mov r0, #0xf b .dsloop_vs -.DrawStrip_vs_hiprio_maybempt: - cmp r7, r9 - beq .dsloop_vs @ must've been empty, otherwise we wouldn't get here - movs r2, r7, lsl #20 @ if (code&0x1000) - mov r2, r2, lsl #1 - add r2, r2, r10, lsl #17 - mov r2, r2, lsr #17 - eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels - mov r9, r7 @ remember code - tst r2, r2 - orrne r10, r10, #1<<22 - bne .DrawStrip_vs_hiprio - b .dsloop_vs - .dsloop_vs_exit: tst r8, #(1<<24) @ seen non hi-prio tile ldr r1, [sp, #9*4] @ est @@ -728,7 +692,8 @@ DrawLayer: stmia sp, {r0,r2,r3,r5,r6,r9} mov r0, sp - bl DrawStripInterlace @ struct TileStrip *ts + mov r1, r9, lsr #29 + bl DrawStripInterlace @ struct TileStrip *ts, int plane_sh add sp, sp, #6*4 ldmfd sp!, {r4-r11,lr} @@ -750,7 +715,7 @@ BackFill: mov r0, r0, lsr #26 add lr, lr, #8 - orr r0, r0, r1, lsl #6 + orr r0, r0, r1, lsl #7 orr r0, r0, r0, lsl #8 orr r0, r0, r0, lsl #16 @@ -881,8 +846,8 @@ DrawTilesFromCache: .dtfc_shadow_blank: tst r1, #1 ldrneb r4, [r1] - mov r6, #0xbf - and r4, r4, #0xbf + mov r6, #0x7f + and r4, r4, r6 strneb r4, [r1], #1 ldrh r4, [r1] orr r6, r6, r6, lsl #8 @@ -932,7 +897,7 @@ DrawTilesFromCache: add r1, r11,#8 mov r3, #320/4/4 - mov r6, #0xbf + mov r6, #0x7f orr r6, r6, r6, lsl #8 orr r6, r6, r6, lsl #16 .dtfc_loop_shprep: @@ -1231,7 +1196,7 @@ DrawSprite: orrs r9, r9, #0x10000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); - orrmi r3, r3, #0x40 @ for sh/hi + orrmi r3, r3, #0x80 @ for sh/hi adds r0, r2, #0 @ mov sx to r0 and set ZV flags b .dspr_loop_enter @@ -1427,10 +1392,10 @@ DrawWindow: .dw_shadow: tst r6, #1 @ hi pri? - orreq r3, r3, #0x40 + orreq r3, r3, #0x80 beq .dw_shadow_done ldr r4, [r1] - mov r5, #0x3f + mov r5, #0x7f orr r5, r5, r5, lsl #8 orr r5, r5, r5, lsl #16 and r4, r4, r5 @@ -1454,33 +1419,6 @@ DrawWindow: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -@ hilights 2 pixels in RGB444/BGR444 format -.macro TileDoShHi2Pixels444 reg - mov \reg, \reg, ror #12 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #28 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #28 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #24 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #28 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #28 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #12 -.endm - - -@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - - @ Convert 0000bbb0 ggg0rrr0 @ to rrrrrggg gggbbbbb @@ -1558,28 +1496,15 @@ PicoDoHighPal555: beq PicoDoHighPal555_end add r3, r10, #OFS_EST_HighPal - - @ shadowed pixels: - mov r12, #0x008e add r4, r3, #0x40*2 - orr r12,r12,#0x7300 - add r5, r3, #0xc0*2 - orr r12,r12,r12,lsl #16 - mov lr, #0x40/4 -.fl_loopcpRGB555_sh: - ldmia r3!, {r1,r6} - subs lr, lr, #1 - and r1, r12, r1, lsr #1 - and r6, r12, r6, lsr #1 - stmia r4!, {r1,r6} - stmia r5!, {r1,r6} - bne .fl_loopcpRGB555_sh - @ hilighted pixels: + @ hilighted pixels (0x40-0x7f): @ t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e; @ t |= (t >> 4) & 0x08610861; @ r8=0x08610861 - sub r3, r3, #0x40*2 + mov r12, #0x008e + orr r12,r12,#0x7300 + orr r12,r12,r12,lsl #16 mov lr, #0x40/4 .fl_loopcpRGB555_hi: ldmia r3!, {r1,r6} @@ -1594,8 +1519,33 @@ PicoDoHighPal555: stmia r4!, {r1,r6} subs lr, lr, #1 bne .fl_loopcpRGB555_hi - mov r0, #1 + sub r3, r3, #0x40*2 + @ shadowed (0x80-0xbf), shadow|hilight (aka normal, 0xc0-0xff) pixels: + add r5, r3, #0xc0*2 + mov lr, #0x40/4 +.fl_loopcpRGB555_sh: + ldmia r3!, {r1,r6} + subs lr, lr, #1 + stmia r5!, {r1,r6} @ 0xc0, normal + and r1, r12, r1, lsr #1 + and r6, r12, r6, lsr #1 + stmia r4!, {r1,r6} + bne .fl_loopcpRGB555_sh + + @ fixup color 14 in palette 0,1,2 (always normal) + sub r4, r3, #0x40*2 + ldrh r1, [r4, #0x0e*2] @ 0x0e, 0x1e, 0x2e + ldrh r5, [r4, #0x1e*2] + ldrh r6, [r4, #0x2e*2] + strh r1, [r3, #0x0e*2] @ 0x4e, 0x5e, 0x6e + strh r5, [r3, #0x1e*2] + strh r6, [r3, #0x2e*2] + strh r1, [r3, #0x4e*2] @ 0x8e, 0x9e, 0xae + strh r5, [r3, #0x5e*2] + strh r6, [r3, #0x6e*2] + + mov r0, #1 PicoDoHighPal555_end: ldmfd sp!, {r4-r10,pc} -- 2.39.5