From: kub Date: Sat, 14 Mar 2020 18:52:27 +0000 (+0100) Subject: vdp fifo speed optimization X-Git-Tag: v2.00~765 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c55a44a88c217900cd4f56f164f14cb680f7597a;p=picodrive.git vdp fifo speed optimization --- diff --git a/pico/misc.c b/pico/misc.c index 4837fd3e..74d4d8a8 100644 --- a/pico/misc.c +++ b/pico/misc.c @@ -48,6 +48,135 @@ const unsigned char hcounts_32[] = { 0x82,0x83,0x83,0x84,0x85,0x85,0x86,0x87,0x87,0x88,0x89,0x8a,0x8a,0x8b,0x8c,0x8c, }; +// VDP transfer slots for blanked and active display in 32col and 40col mode. +// 1 slot is 488/171 = 2.8538 68k cycles in h32, and 488/210 = 2.3238 in h40 +// In blanked display, all slots but 5(h32) / 6(h40) are usable for transfers, +// in active display only 16(h32) / 18(h40) slots can be used. + +// XXX inactive tables by slot#=cycles*maxslot#/488. should be through hv tables +// VDP transfer slots in inactive (blanked) display 32col mode. +// refresh slots: 250, 26, 58, 90, 122 -> 32, 64, 96, 128, 160 +const unsigned char vdpcyc2sl_32_bl[] = { // 68k cycles/2 to slot # +// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 + 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, + 10, 11, 12, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, + 21, 22, 23, 23, 24, 25, 25, 26, 27, 27, 28, 29, 29, 30, 31, 31, + 32, 33, 34, 34, 35, 36, 36, 37, 38, 38, 39, 40, 40, 41, 42, 42, + 43, 44, 44, 45, 46, 46, 47, 48, 48, 49, 50, 51, 51, 52, 53, 53, + 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 61, 61, 62, 63, 63, 64, + 65, 65, 66, 67, 68, 68, 69, 70, 70, 71, 72, 72, 73, 74, 74, 75, + 76, 76, 77, 78, 78, 79, 80, 80, 81, 82, 83, 83, 84, 85, 85, 86, + 87, 87, 88, 89, 89, 90, 91, 91, 92, 93, 93, 94, 95, 95, 96, 97, + 97, 98, 99,100,100,101,102,102,103,104,104,105,106,106,107,108, + 108,109,110,110,111,112,112,113,114,114,115,116,117,117,118,119, + 119,120,121,121,122,123,123,124,125,125,126,127,127,128,129,129, + 130,131,131,132,133,134,134,135,136,136,137,138,138,139,140,140, + 141,142,142,143,144,144,145,146,146,147,148,148,149,150,151,151, + 152,153,153,154,155,155,156,157,157,158,159,159,160,161,161,162, + 163,163,164,165,166,166,167,168,168,169,170,170,171,172,172,173, +}; +// VDP transfer slots in inactive (blanked) display 40col mode. +// refresh slots: 250, 26, 58, 90, 122, 154 -> 40, 72, 104, 136, 168, 200 +const unsigned char vdpcyc2sl_40_bl[] = { // 68k cycles/2 to slot # +// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 + 0, 0, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10, 10, 11, 12, + 13, 14, 15, 15, 16, 17, 18, 19, 20, 20, 21, 22, 23, 24, 25, 25, + 26, 27, 28, 29, 30, 30, 31, 32, 33, 34, 35, 35, 36, 37, 38, 39, + 40, 40, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 51, 52, + 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 61, 62, 63, 64, 65, 66, + 66, 67, 68, 69, 70, 71, 71, 72, 73, 74, 75, 76, 76, 77, 78, 79, + 80, 81, 81, 82, 83, 84, 85, 86, 86, 87, 88, 89, 90, 91, 91, 92, + 93, 94, 95, 96, 96, 97, 98, 99,100,101,102,102,103,104,105,106, + 107,107,108,109,110,111,112,112,113,114,115,116,117,117,118,119, + 120,121,122,122,123,124,125,126,127,127,128,129,130,131,132,132, + 133,134,135,136,137,137,138,139,140,141,142,142,143,144,145,146, + 147,147,148,149,150,151,152,153,153,154,155,156,157,158,158,159, + 160,161,162,163,163,164,165,166,167,168,168,169,170,171,172,173, + 173,174,175,176,177,178,178,179,180,181,182,183,183,184,185,186, + 187,188,188,189,190,191,192,193,193,194,195,196,197,198,198,199, + 200,201,202,203,204,204,205,206,207,208,209,209,210,211,212,213, +}; +// VDP transfer slots in active display 32col mode. Transfer slots (Hint=0): +// 11,25,40,48,56,72,80,88,104,112,120,136,144,152,167,168 +const unsigned char vdpcyc2sl_32[] = { // 68k cycles/2 to slot # +// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, +}; +// VDP transfer slots in active display 40col mode. Transfer slots (Hint=0): +// 21,47,55,63,79,87,95,111,119,127,143,151,159,175,183,191,206,207 +const unsigned char vdpcyc2sl_40[] = { // 68k cycles/2 to slot # +// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 32 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64 + 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 96 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, // 128 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, // 160 + 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, // 192 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 224 + 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, // 256 + 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 288 + 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, // 320 + 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, // 352 + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, // 384 + 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, // 416 + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, // 448 + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, // 480 +}; + +// XXX inactive tables by cyc=slot#*488/maxslot#. should be through hv tables +const unsigned short vdpsl2cyc_32_bl[] = { // slot # to 68k cycles/2 + 0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, + 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46, + 48, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, + 71, 73, 74, 75, 77, 78, 80, 81, 83, 84, 86, 87, 89, 90, 92, 93, + 95, 96, 98, 99,100,102,103,105,106,108,109,111,112,114,115,117, + 118,120,121,122,124,125,127,128,130,131,133,134,136,137,139,140, + 142,143,145,146,147,149,150,152,153,155,156,158,159,161,162,164, + 165,167,168,170,171,172,174,175,177,178,180,181,183,184,186,187, + 189,190,192,193,195,196,197,199,200,202,203,205,206,208,209,211, + 212,214,215,217,218,220,221,222,224,225,227,228,230,231,233,234, + 236,237,239,240,242,243,244,246, +}; +const unsigned short vdpsl2cyc_40_bl[] = { // slot # to 68k cycles/2 + 0, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, + 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 38, + 39, 40, 41, 42, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 56, 57, + 58, 59, 60, 61, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 75, 76, + 77, 78, 79, 81, 82, 83, 84, 85, 87, 88, 89, 90, 91, 93, 94, 95, + 96, 97, 99,100,101,102,103,105,106,107,108,109,111,112,113,114, + 115,117,118,119,120,121,122,124,125,126,127,128,130,131,132,133, + 134,136,137,138,139,140,142,143,144,145,146,148,149,150,151,152, + 154,155,156,157,158,160,161,162,163,164,166,167,168,169,170,172, + 173,174,175,176,178,179,180,181,182,183,185,186,187,188,189,191, + 192,193,194,195,197,198,199,200,201,203,204,205,206,207,209,210, + 211,212,213,215,216,217,218,219,221,222,223,224,225,227,228,229, + 230,231,233,234,235,236,237,239,240,241,242,243,244,246, +}; +const unsigned short vdpsl2cyc_32[] = { // slot # to 68k cycles/2 + 0, 16, 36, 56, 67, 79,102,113,125,148,159,171,194,205,217,239, + 240,260 +}; +const unsigned short vdpsl2cyc_40[] = { // slot # to 68k cycles/2 + 0, 24, 55, 64, 73, 92,101,110,129,138,147,166,175,184,203,212, + 221,239,240,268 +}; + #ifndef _ASM_MISC_C PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count) { diff --git a/pico/pico.c b/pico/pico.c index 9db2fc64..87e22e59 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -79,6 +79,7 @@ void PicoPower(void) Pico.video.reg[0] = Pico.video.reg[1] = 0x04; Pico.video.reg[0xc] = 0x81; Pico.video.reg[0xf] = 0x02; + PicoVideoFIFOMode(0, 1); if (PicoIn.AHW & PAHW_MCD) PicoPowerMCD(); diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 50a632ca..017c404b 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -179,6 +179,7 @@ static int PicoFrameHints(void) } pv->status |= SR_VB | PVS_VB2; // go into vblank + PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); // the following SekRun is there for several reasons: // there must be a delay after vblank bit is set and irq is asserted (Mazin Saga) @@ -270,6 +271,7 @@ static int PicoFrameHints(void) pv->status &= ~(SR_VB | PVS_VB2); pv->status |= ((pv->reg[1] >> 3) ^ SR_VB) & SR_VB; // forced blanking + PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); // last scanline Pico.m.scanline = y++; diff --git a/pico/pico_int.h b/pico/pico_int.h index 65b56f1d..c0f2c343 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -299,6 +299,8 @@ extern SH2 sh2s[2]; #define PVS_CPUWR (1 << 18) // CPU write blocked by FIFO full #define PVS_CPURD (1 << 19) // CPU read blocked by FIFO not empty #define PVS_DMAFILL (1 << 20) // DMA fill is waiting for fill data +#define PVS_DMABG (1 << 21) // background DMA operation is running +#define PVS_FIFORUN (1 << 22) // FIFO is processing struct PicoVideo { @@ -858,6 +860,7 @@ unsigned char PicoVideoRead8HV_L(void); extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask); void PicoVideoFIFOSync(int cycles); int PicoVideoFIFOHint(void); +void PicoVideoFIFOMode(int active, int h40); int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask, unsigned sr_flags); void PicoVideoSave(void); void PicoVideoLoad(void); diff --git a/pico/videoport.c b/pico/videoport.c index cbcea796..3ed7f5b4 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -12,8 +12,11 @@ #define NEED_DMA_SOURCE #include "memory.h" -extern const unsigned char hcounts_32[]; -extern const unsigned char hcounts_40[]; +extern const unsigned char hcounts_32[], hcounts_40[]; +extern const unsigned char vdpcyc2sl_32_bl[], vdpcyc2sl_40_bl[]; +extern const unsigned char vdpcyc2sl_32[], vdpcyc2sl_40[]; +extern const unsigned short vdpsl2cyc_32_bl[], vdpsl2cyc_40_bl[]; +extern const unsigned short vdpsl2cyc_32[], vdpsl2cyc_40[]; static int blankline; // display disabled for this line static unsigned sat; // VRAM addr of sprite attribute table @@ -53,48 +56,6 @@ int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned * FIFORead executes a 68k read. 68k is blocked until the next transfer slot. */ -// FIFO transfer slots per line: [active][h40] -static const short vdpslots[2][2] = {{ 166, 204 },{ 16, 18 }}; -// mapping between slot# and 68k cycles in a blanked scanline [H32, H40] -static const int vdpcyc2sl_bl[] = { (166<<16)/488, (204<<16)/488 }; -static const int vdpsl2cyc_bl[] = { (488<<16)/166, (488<<16)/204 }; - -// VDP transfer slots in active display 32col mode. 1 slot is 488/171 = 2.8538 -// 68k cycles. Only 16 of the 171 slots in a scanline can be used by CPU/DMA: -// (HINT=slot 0): 11,25,40,48,56,72,80,88,104,112,120,136,144,152,167,168 -static const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 to slot # -// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, - 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9,10, -10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11, -11,12,12,12,12,12,13,13,13,13,13,13,14,14,14,14, -14,14,14,14,14,14,14,15,16,16,16,16,16,16,16,16, -}; -static const unsigned char vdpsl2cyc_32[] = { // slot # to 68k cycles/4 - 0, 8, 18, 28, 33, 39, 51, 56, 62, 74, 79, 85, 97,102,108,119,120,130 -}; - -// VDP transfer slots in active display 40col mode. 1 slot is 488/210 = 2.3238 -// 68k cycles. Only 18 of the 210 slots in a scanline can be used by CPU/DMA: -// (HINT=0): 21,47,55,63,79,87,95,111,119,127,143,151,159,175,183,191,206,207, -static const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 to slot # -// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, - 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10,10, -10,10,10,11,11,11,11,12,12,12,12,12,13,13,13,13, -13,13,13,13,13,14,14,14,14,14,15,15,15,15,15,16, -16,16,16,16,16,16,16,17,18,18,18,18,18,18,18,18, -}; -static const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 - 0, 12, 27, 32, 36, 46, 50, 55, 64, 69, 73, 83, 87, 92,101,106,111,119,120,134 -}; - // NB code assumes fifo_* arrays have size 2^n // last transferred FIFO data, ...x = index XXX currently only CPU static short fifo_data[4], fifo_dx; // XXX must go into save? @@ -106,34 +67,10 @@ enum { FQ_BYTE = 1, FQ_BGDMA = 2, FQ_FGDMA = 4 }; // queue flags, NB: BYTE = 1! static unsigned int fifo_total; // total# of pending FIFO entries (w/o BGDMA) static unsigned short fifo_slot; // last executed slot in current scanline +static unsigned short fifo_maxslot;// #slots in scanline -// map cycles to FIFO slot -static __inline int GetFIFOSlot(struct PicoVideo *pv, int cycles) -{ - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - - if (active) return (h40 ? vdpcyc2sl_40 : vdpcyc2sl_32)[cycles/4]; - else return (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; -} - -static __inline int GetMaxFIFOSlot(struct PicoVideo *pv) -{ - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - - return vdpslots[active][h40]; -} - -// map FIFO slot to cycles -static __inline int GetFIFOCycles(struct PicoVideo *pv, int slot) -{ - int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); - int h40 = pv->reg[12] & 1; - - if (active) return (h40 ? vdpsl2cyc_40 : vdpsl2cyc_32)[slot]*4; - else return ((slot * vdpsl2cyc_bl[h40] + slot) >> 16); -} +static const unsigned char *fifo_cyc2sl; +static const unsigned short *fifo_sl2cyc; // do the FIFO math static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) @@ -149,20 +86,16 @@ static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) // if entry has been processed... if (pv->fifo_cnt == 0) { - if (fifo_ql) { - // terminate DMA if applicable - if ((pv->status & SR_DMA) && (fifo_queue[fifo_qx] & FQ_BGDMA)) { - pv->status &= ~SR_DMA; - pv->command &= ~0x80; - } - // remove entry from FIFO + // remove entry from FIFO + if (fifo_ql) fifo_qx ++, fifo_qx &= 7, fifo_ql --; - } // start processing for next entry if there is one if (fifo_ql) pv->fifo_cnt = (fifo_queue[fifo_qx] >> 3) << (fifo_queue[fifo_qx] & FQ_BYTE); - else + else { // FIFO empty + pv->status &= ~PVS_FIFORUN; fifo_total = 0; + } } return l; } @@ -170,16 +103,20 @@ static __inline int AdvanceFIFOEntry(struct PicoVideo *pv, int slots) static __inline void SetFIFOState(struct PicoVideo *pv) { // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore - if (fifo_total == 0) - pv->status &= ~PVS_CPURD; if (fifo_total <= 4) { - int x = (fifo_qx + fifo_ql - 1) & 7; - if ((pv->status & SR_DMA) && !(pv->status & PVS_DMAFILL) && - (!fifo_ql || !(fifo_queue[x] & FQ_BGDMA))) { + pv->status &= ~PVS_CPUWR; + if (!(pv->status & (PVS_DMABG|PVS_DMAFILL))) { pv->status &= ~SR_DMA; pv->command &= ~0x80; } - pv->status &= ~PVS_CPUWR; + } + if (fifo_total == 0) { + pv->status &= ~PVS_CPURD; + // terminate DMA if applicable + if (!(pv->status & (PVS_FIFORUN|PVS_DMAFILL))) { + pv->status &= ~(SR_DMA|PVS_DMABG); + pv->command &= ~0x80; + } } } @@ -190,7 +127,7 @@ void PicoVideoFIFOSync(int cycles) int slots, done; // calculate #slots since last executed slot - slots = GetFIFOSlot(pv, cycles) - fifo_slot; + slots = fifo_cyc2sl[cycles>>1] - fifo_slot; // advance FIFO queue by #done slots done = slots; @@ -208,31 +145,28 @@ void PicoVideoFIFOSync(int cycles) int PicoVideoFIFODrain(int level, int cycles, int bgdma) { struct PicoVideo *pv = &Pico.video; - int maxsl = GetMaxFIFOSlot(pv); // max xfer slots in this scanline + unsigned ocyc = cycles; int burn = 0; // process FIFO entries until low level is reached - while (fifo_total > level && fifo_slot < maxsl && + while (fifo_total > level && fifo_slot < fifo_maxslot && (!(fifo_queue[fifo_qx] & FQ_BGDMA) || bgdma)) { int b = fifo_queue[fifo_qx] & FQ_BYTE; int cnt = ((fifo_total-level) << b) - (pv->fifo_cnt & b); - int last = fifo_slot; - int slot = (pv->fifo_cnt < cnt ? pv->fifo_cnt : cnt) + last; // target slot - unsigned ocyc = cycles; + int slot = (pv->fifo_cntfifo_cnt:cnt) + fifo_slot; // target slot - if (slot > maxsl) { + if (slot > fifo_maxslot) { // target in later scanline, advance to eol - slot = maxsl; + slot = fifo_maxslot; cycles = 488; } else { // advance FIFO to target slot and CPU to cycles at that slot - cycles = GetFIFOCycles(pv, slot); + cycles = fifo_sl2cyc[slot]<<1; } + AdvanceFIFOEntry(pv, slot - fifo_slot); fifo_slot = slot; - burn += cycles - ocyc; - - AdvanceFIFOEntry(pv, slot - last); } + burn = cycles - ocyc; SetFIFOState(pv); @@ -246,17 +180,19 @@ int PicoVideoFIFORead(void) int lc = SekCyclesDone()-Pico.t.m68c_line_start; int burn = 0; - PicoVideoFIFOSync(lc); + if (pv->fifo_cnt) { + PicoVideoFIFOSync(lc); + // advance FIFO and CPU until FIFO is empty + burn = PicoVideoFIFODrain(0, lc, 1); + lc += burn; + } - // advance FIFO and CPU until FIFO is empty - burn = PicoVideoFIFODrain(0, lc, 1); - lc += burn; if (fifo_total > 0) pv->status |= PVS_CPURD; // target slot is in later scanline else { // use next VDP access slot for reading, block 68k until then - fifo_slot = GetFIFOSlot(pv, lc) + 1; - burn += GetFIFOCycles(pv, fifo_slot) - lc; + fifo_slot = fifo_cyc2sl[lc>>1] + 1; + burn += (fifo_sl2cyc[fifo_slot]<<1) - lc; } return burn; @@ -267,35 +203,41 @@ int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) { struct PicoVideo *pv = &Pico.video; int lc = SekCyclesDone()-Pico.t.m68c_line_start; - int burn = 0, x; + int burn = 0, x, head = 0; - PicoVideoFIFOSync(lc); + if (pv->fifo_cnt) + PicoVideoFIFOSync(lc); pv->status = (pv->status & ~sr_mask) | sr_flags; if (count && fifo_ql < 8) { // update FIFO state if it was empty if (fifo_ql == 0) { - fifo_slot = GetFIFOSlot(pv, lc+9); // FIFO latency ~3 vdp slots + fifo_slot = fifo_cyc2sl[(lc+8)>>1]; // FIFO latency ~3 vdp slots pv->fifo_cnt = count << (flags & FQ_BYTE); + pv->status |= PVS_FIFORUN; } - // create xfer queue entry + // determine queue position for entry x = (fifo_qx + fifo_ql - 1) & 7; if (fifo_ql && (fifo_queue[x] & FQ_BGDMA)) { // CPU FIFO writes have priority over a background DMA Fill/Copy fifo_queue[(x+1) & 7] = fifo_queue[x]; - if (fifo_ql == 1) { + if (x == fifo_qx) { // overtaking to queue head? // XXX if interrupting a DMA fill, fill data changes int f = fifo_queue[x] & 7; fifo_queue[(x+1) & 7] = (pv->fifo_cnt >> (f & FQ_BYTE) << 3) | f; pv->fifo_cnt = count << (flags & FQ_BYTE); + head = 1; } x = (x-1) & 7; } - if (fifo_ql && (fifo_queue[x] & 7) == flags) { + + // create xfer queue entry + if (fifo_ql && !head && (fifo_queue[x] & 7) == flags) { // amalgamate entries if of same type fifo_queue[x] += (count << 3); - if (fifo_ql == 1) pv->fifo_cnt += count << (flags & FQ_BYTE); + if (x == fifo_qx) // modifiying fifo head, adjust count + pv->fifo_cnt += count << (flags & FQ_BYTE); } else { fifo_ql ++; x = (x+1) & 7; @@ -331,20 +273,25 @@ int PicoVideoFIFOHint(void) } // switch FIFO mode between active/inactive display -static void PicoVideoFIFOMode(int active) +void PicoVideoFIFOMode(int active, int h40) { + static const unsigned char *vdpcyc2sl[2][2] = + { {vdpcyc2sl_32_bl, vdpcyc2sl_40_bl} , {vdpcyc2sl_32, vdpcyc2sl_40} }; + static const unsigned short *vdpsl2cyc[2][2] = + { {vdpsl2cyc_32_bl, vdpsl2cyc_40_bl} , {vdpsl2cyc_32, vdpsl2cyc_40} }; + struct PicoVideo *pv = &Pico.video; - int h40 = pv->reg[12] & 1; int lc = SekCyclesDone() - Pico.t.m68c_line_start; + active = active && !(pv->status & PVS_VB2); - PicoVideoFIFOSync(lc); + if (fifo_maxslot) + PicoVideoFIFOSync(lc); - if (fifo_ql) { - // recalculate FIFO slot for new mode - if (!(pv->status & SR_VB) && active) - fifo_slot = (pv->reg[12]&1 ? vdpcyc2sl_40 : vdpcyc2sl_32)[lc/4]; - else fifo_slot = ((lc * vdpcyc2sl_bl[h40] + lc) >> 16); - } + fifo_cyc2sl = vdpcyc2sl[active][h40]; + fifo_sl2cyc = vdpsl2cyc[active][h40]; + // recalculate FIFO slot for new mode + fifo_slot = fifo_cyc2sl[lc>>1]-1; + fifo_maxslot = fifo_cyc2sl[488>>1]; } @@ -459,7 +406,7 @@ static void DmaSlow(int len, unsigned int source) SekCyclesDone(), SekPc); SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_FGDMA | (Pico.video.type == 1), - 0, SR_DMA| PVS_CPUWR)); + PVS_DMABG, SR_DMA | PVS_CPUWR)); if ((source & 0xe00000) == 0xe00000) { // Ram base = (u16 *)PicoMem.ram; @@ -583,13 +530,13 @@ static void DmaCopy(int len) int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); + // XXX implement VRAM 128k? Is this even working? xfer/count still FQ_BYTE? SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA | FQ_BYTE, - PVS_CPUWR, SR_DMA)); + PVS_CPUWR, SR_DMA | PVS_DMABG)); source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; - // XXX implement VRAM 128k? Is this even working? count still in bytes? for (; len; len--) { vr[(u16)a] = vr[(u16)(source++)]; @@ -616,7 +563,7 @@ static NOINLINE void DmaFill(int data) elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA | (Pico.video.type == 1), - PVS_CPUWR | PVS_DMAFILL, SR_DMA)); + PVS_CPUWR | PVS_DMAFILL, SR_DMA | PVS_DMABG)); switch (Pico.video.type) { @@ -823,11 +770,13 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) if (num == 0 && !(pvid->reg[0]&2) && (d&2)) pvid->hv_latch = PicoVideoRead(0x08); if (num == 1 && ((pvid->reg[1]^d)&0x40)) { - PicoVideoFIFOMode(d & 0x40); + PicoVideoFIFOMode(d & 0x40, pvid->reg[12]&1); // handle line blanking before line rendering if (SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) blankline = d&0x40 ? -1 : Pico.m.scanline; } + if (num == 12 && ((pvid->reg[12]^d)&0x01)) + PicoVideoFIFOMode(pvid->reg[1]&0x40, d & 1); DrawSync(SekCyclesDone() - Pico.t.m68c_line_start <= 488-390); pvid->reg[num]=(unsigned char)d; switch (num) @@ -1058,6 +1007,7 @@ void PicoVideoLoad(void) // convert former dma_xfers (why was this in PicoMisc anyway?) if (Pico.m.dma_xfers) { + pv->status = SR_DMA|PVS_FIFORUN; pv->fifo_cnt = Pico.m.dma_xfers * (pv->type == 1 ? 2 : 1); fifo_total = Pico.m.dma_xfers; Pico.m.dma_xfers = 0;