From 17bd69adc6f0ae747dd7f3304099c7ef76b908cf Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 7 Feb 2020 22:10:18 +0100 Subject: [PATCH] revised VDP fifo implementation --- pico/debug.c | 6 + pico/pico.c | 57 +----- pico/pico_cmn.c | 37 ++-- pico/pico_int.h | 12 +- pico/videoport.c | 456 +++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 430 insertions(+), 138 deletions(-) diff --git a/pico/debug.c b/pico/debug.c index e617d908..e4b5232e 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -43,6 +43,12 @@ char *PDebugMain(void) !!(Pico.sv.flags & SRF_ENABLED), !!(Pico.sv.flags & SRF_EEPROM), Pico.sv.eeprom_type); MVP; sprintf(dstrp, "sram range: %06x-%06x, reg: %02x\n", Pico.sv.start, Pico.sv.end, Pico.m.sram_reg); MVP; sprintf(dstrp, "pend int: v:%i, h:%i, vdp status: %04x\n", bit(pv->pending_ints,5), bit(pv->pending_ints,4), pv->status); MVP; + sprintf(dstrp, "VDP regs 00-07: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[0],reg[1],reg[2],reg[3],reg[4],reg[5],reg[6],reg[7]); MVP; + sprintf(dstrp, "VDP regs 08-0f: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[8],reg[9],reg[10],reg[11],reg[12],reg[13],reg[14],reg[15]); MVP; + sprintf(dstrp, "VDP regs 10-17: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[16],reg[17],reg[18],reg[19],reg[20],reg[21],reg[22],reg[23]); MVP; + sprintf(dstrp, "VDP regs 18-1f: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[24],reg[25],reg[26],reg[27],reg[28],reg[29],reg[30],reg[31]); MVP; + r = (reg[5]<<9)+(reg[6]<<11); + sprintf(dstrp, "sprite #0: %04x %04x %04x %04x\n",PicoMem.vram[r/2],PicoMem.vram[r/2+1],PicoMem.vram[r/2+2],PicoMem.vram[r/2+3]); MVP; sprintf(dstrp, "pal: %i, hw: %02x, frame#: %i, cycles: %u\n", Pico.m.pal, Pico.m.hardware, Pico.m.frame_count, SekCyclesDone()); MVP; sprintf(dstrp, "M68k: PC: %06x, SR: %04x, irql: %i\n", SekPc, SekSr, SekIrqLevel); MVP; for (r = 0; r < 8; r++) { diff --git a/pico/pico.c b/pico/pico.c index b65b7de8..9db2fc64 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -67,6 +67,7 @@ void PicoPower(void) memset(&Pico.video,0,sizeof(Pico.video)); memset(&Pico.m,0,sizeof(Pico.m)); + memset(&Pico.t,0,sizeof(Pico.t)); Pico.video.pending_ints=0; z80_reset(); @@ -182,8 +183,7 @@ int PicoReset(void) PsndReset(); // pal must be known here // create an empty "dma" to cause 68k exec start at random frame location - if (Pico.m.dma_xfers == 0 && !(PicoIn.opt & POPT_DIS_VDP_FIFO)) - Pico.m.dma_xfers = rand() & 0x1fff; + PicoVideoFIFOWrite(rand() & 0x1fff, 0, 0, PVS_CPURD); SekFinishIdleDet(); @@ -222,57 +222,6 @@ void PicoLoopPrepare(void) rendstatus_old = -1; } -// this table is wrong and should be removed -// keeping it for now to compensate wrong timing elswhere, mainly for Outrunners -static const int dma_timings[] = { // Q16 - // dma2vram dma2[vs|c]ram vram_fill vram_copy - // VRAM has half the width of VSRAM/CRAM, thus half the performance - ( 83<<16)/488, (166<<16)/488, (165<<16)/488, ( 83<<16)/488, // vblank 32cell - (102<<16)/488, (204<<16)/488, (203<<16)/488, (102<<16)/488, // vblank 40cell - ( 8<<16)/488, ( 16<<16)/488, ( 15<<16)/488, ( 8<<16)/488, // active 32cell - ( 9<<16)/488, ( 18<<16)/488, ( 17<<16)/488, ( 9<<16)/488 // active 40cell -}; - -static const int dma_bsycles[] = { // Q16 - (488<<16)/83, (488<<16)/166, (488<<16)/165, (488<<16)/83, - (488<<16)/102, (488<<16)/204, (488<<16)/203, (488<<16)/102, - (488<<16)/8, (488<<16)/16, (488<<16)/15, (488<<16)/8, - (488<<16)/9, (488<<16)/18, (488<<16)/17, (488<<16)/9 -}; - -// grossly inaccurate.. FIXME FIXXXMEE -PICO_INTERNAL int CheckDMA(int cycles) -{ - int burn = 0, xfers_can, dma_op = Pico.video.reg[0x17]>>6; // see gens for 00 and 01 modes - int xfers = Pico.m.dma_xfers; - int dma_op1; - - // safety pin - if (cycles <= 0) return 0; - - if(!(dma_op&2)) dma_op = (Pico.video.type==1) ? 0 : 1; // setting dma_timings offset here according to Gens - dma_op1 = dma_op; - if(Pico.video.reg[12] & 1) dma_op |= 4; // 40 cell mode? - if(!(Pico.video.status&8)&&(Pico.video.reg[1]&0x40)) dma_op|=8; // active display? - xfers_can = (dma_timings[dma_op] * cycles + 0x8000) >> 16; - if(xfers <= xfers_can) - { - Pico.video.status &= ~SR_DMA; - if (!(dma_op & 2)) - burn = xfers * dma_bsycles[dma_op] >> 16; - Pico.m.dma_xfers = 0; - } else { - if(!(dma_op&2)) burn = cycles; - Pico.m.dma_xfers -= xfers_can; - } - Pico.t.dma_end = SekCyclesDone() + burn; - - elprintf(EL_VDPDMA, "~Dma %i op=%i can=%i burn=%i [%u]", - Pico.m.dma_xfers, dma_op1, xfers_can, burn, SekCyclesDone()); - //dprintf("~aim: %i, cnt: %i", Pico.t.m68c_aim, Pico.t.m68c_cnt); - return burn; -} - #include "pico_cmn.c" /* sync z80 to 68k */ @@ -319,7 +268,7 @@ void PicoFrame(void) goto end; } - //if(Pico.video.reg[12]&0x2) Pico.video.status ^= 0x10; // change odd bit in interlace mode + //if(Pico.video.reg[12]&0x2) Pico.video.status ^= SR_ODD; // change odd bit in interlace mode PicoFrameStart(); PicoFrameHints(); diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index b7e7d835..75389840 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -72,27 +72,19 @@ static void do_hint(struct PicoVideo *pv) } } -static void do_timing_hacks_as(struct PicoVideo *pv, int vdp_slots, int cycles) +static void do_timing_hacks_end(struct PicoVideo *pv) { - pv->lwrite_cnt += vdp_slots - Pico.m.dma_xfers * 2; // wrong *2 - if (pv->lwrite_cnt > vdp_slots) - pv->lwrite_cnt = vdp_slots; - else if (pv->lwrite_cnt < 0) - pv->lwrite_cnt = 0; - if (Pico.m.dma_xfers) - SekCyclesBurn(CheckDMA(cycles)); + PicoVideoFIFOSync(488); } -static void do_timing_hacks_vb(int cycles) +static void do_timing_hacks_start(struct PicoVideo *pv) { - if (unlikely(Pico.m.dma_xfers)) - SekCyclesBurn(CheckDMA(cycles)); + SekCyclesBurn(PicoVideoFIFOHint()); // prolong cpu HOLD if necessary } static int PicoFrameHints(void) { struct PicoVideo *pv = &Pico.video; - int vdp_slots = (Pico.video.reg[12] & 1) ? 18 : 16; int lines, y, lines_vis, skip; int vcnt_wrap, vcnt_adj; unsigned int cycles; @@ -155,8 +147,9 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_as(pv, vdp_slots, CYCLES_M68K_LINE); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); @@ -175,10 +168,6 @@ static int PicoFrameHints(void) #endif } - // VDP FIFO - pv->lwrite_cnt = 0; - Pico.video.status |= SR_EMPT; - memcpy(PicoIn.padInt, PicoIn.pad, sizeof(PicoIn.padInt)); PAD_DELAY(); @@ -196,7 +185,7 @@ static int PicoFrameHints(void) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_vb(CYCLES_M68K_VINT_LAG); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_VINT_LAG); pv->status |= SR_F; @@ -224,8 +213,8 @@ static int PicoFrameHints(void) #endif // Run scanline: - do_timing_hacks_vb(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); @@ -260,8 +249,9 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_vb(CYCLES_M68K_LINE); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); @@ -271,8 +261,9 @@ static int PicoFrameHints(void) unsigned int l = PicoIn.overclockM68k * lines / 100; while (l-- > 0) { Pico.t.m68c_cnt -= CYCLES_M68K_LINE; - do_timing_hacks_vb(CYCLES_M68K_LINE); + do_timing_hacks_start(pv); SekSyncM68k(); + do_timing_hacks_end(pv); } } @@ -282,7 +273,6 @@ static int PicoFrameHints(void) // last scanline Pico.m.scanline = y++; pv->v_counter = 0xff; - pv->lwrite_cnt = 0; PAD_DELAY(); @@ -297,8 +287,9 @@ static int PicoFrameHints(void) // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; - do_timing_hacks_as(pv, vdp_slots, CYCLES_M68K_LINE); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); diff --git a/pico/pico_int.h b/pico/pico_int.h index 357de4a9..b3ce8a72 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -296,6 +296,10 @@ extern SH2 sh2s[2]; // not part of real SR #define PVS_ACTIVE (1 << 16) #define PVS_VB2 (1 << 17) // ignores forced blanking +#define PVS_CPUWR (1 << 18) // CPU hold by FIFO full +#define PVS_CPURD (1 << 19) // CPU hold by FIFO full +#define PVS_DMAPEND (1 << 20) // DMA operation waiting for start +#define PVS_DMAFILL (1 << 21) // DMA fill is in progress struct PicoVideo { @@ -306,7 +310,7 @@ struct PicoVideo unsigned short addr; // Read/Write address unsigned int status; // Status bits (SR) and extra flags unsigned char pending_ints; // pending interrupts: ??VH???? - signed char lwrite_cnt; // VDP write count during active display line + signed char pad1; // was VDP write count unsigned short v_counter; // V-counter unsigned short debug; // raw debug register unsigned char debug_p; // ... parsed: PVD_* @@ -335,7 +339,7 @@ struct PicoMisc unsigned char eeprom_slave; // EEPROM slave word for X24C02 and better SRAMs unsigned char eeprom_status; unsigned char pad1; // was ym2612 status - unsigned short dma_xfers; // 18 + unsigned short pad2; // 18 was dma_xfers unsigned char eeprom_wb[2]; // EEPROM latch/write buffer unsigned int frame_count; // 1c for movies and idle det }; @@ -419,7 +423,6 @@ struct PicoTiming unsigned int z80c_aim; int z80_scanline; - unsigned int dma_end; // end of current DMA op (m68k cycles) int timer_a_next_oflow, timer_a_step; // in z80 cycles int timer_b_next_oflow, timer_b_step; }; @@ -850,6 +853,9 @@ unsigned char PicoVideoRead8CtlL(void); unsigned char PicoVideoRead8HV_H(void); unsigned char PicoVideoRead8HV_L(void); extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask); +void PicoVideoFIFOSync(int cycles); +int PicoVideoFIFOHint(void); +int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask, unsigned sr_flags); // misc.c PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count); diff --git a/pico/videoport.c b/pico/videoport.c index cdc5796c..881a74a3 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -14,9 +14,296 @@ extern const unsigned char hcounts_32[]; extern const unsigned char hcounts_40[]; +static unsigned hvlatch; // latched hvcounter value +static int blankline; // display disabled for this line int (*PicoDmaHook)(unsigned int source, int len, unsigned short **base, unsigned int *mask) = NULL; + +/* VDP FIFO implementation + * + * fifo_slot: last slot executed in this scanline + * fifo_cnt: #slots remaining for active FIFO write (#writes<<#bytep) + * fifo_total: #total FIFO entries pending + * fifo_data: last values transferred through fifo + * fifo_queue: fifo transfer queue (#writes, VRAM_byte_p) + * + * FIFO states: empty total=0 + * inuse total>0 && total<4 + * full total==4 + * wait total>4 + * Conditions: + * fifo_slot is always behind slot2cyc[cycles]. Advancing it beyond cycles + * implies blocking the 68k up to that slot. + * + * A FIFO write goes to the end of the fifo queue. There can be more pending + * writes than FIFO slots, but the 68k will be blocked in most of those cases. + * This is only about correct timing, data xfer must be handled by the caller. + * Blocking the CPU means burning cycles via SekCyclesBurn*(), which is to be + * executed by the caller. + * + * FIFOSync "executes" FIFO write slots up to the given cycle in the current + * scanline. A queue entry completely executed is removed from the queue. + * FIFOWrite pushes writes to the transfer queue. If it's a blocking write, 68k + * is blocked if more than 4 FIFO writes are pending. + * FIFORead executes a 68k read. 68k is blocked until the next transfer slot. + */ + +// FIFO transfer slots per line: H32 blank, H40 blank, H32 active, H40 active +static const short vdpslots[] = { 166, 204, 16, 18 }; +// mapping between slot# and 68k cycles in a blanked scanline +static const int vdpcyc2sl_bl[] = { (166<<16)/488, (204<<16)/488, (16<<16)/488, (18<<16)/488 }; +static const int vdpsl2cyc_bl[] = { (488<<16)/166, (488<<16)/204, (488<<16)/16, (488<<16)/18 }; + +// VDP transfer slots in active display 32col mode. 1 slot is 488/171 = 2.8538 +// 68k cycles. Only 16 of the 171 slots in a scanline can be used by CPU/DMA: +// (HINT=slot 0): 13,27,42,50,58,74,82,90,106,114,122,138,146,154,169,170 +const unsigned char vdpcyc2sl_32[] = { // 68k cycles/4 since HINT to slot # +// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, + 9,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11, +11,11,12,12,12,12,12,12,13,13,13,13,13,13,14,14, +14,14,14,14,14,14,14,14,15,16,16,16,16,16,16,16, +}; +const unsigned char vdpsl2cyc_32[] = { // slot # to 68k cycles/4 since HINT + 0, 9, 19, 30, 35, 41, 52, 58, 64, 75, 81, 87, 98,104,110,120,121,123,123 +}; + +// VDP transfer slots in active display 40col mode. 1 slot is 488/210 = 2.3238 +// 68k cycles. Only 18 of the 210 slots in a scanline can be used by CPU/DMA: +// (HINT=0): 23,49,57,65,81,89,97,113,121,129,145,153,161,177,185,193,208,209 +const unsigned char vdpcyc2sl_40[] = { // 68k cycles/4 since HINT to slot # +// 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 8, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,10,10, +10,10,10,10,11,11,11,11,12,12,12,12,12,13,13,13, +13,13,13,13,13,13,14,14,14,14,14,15,15,15,15,15, +16,16,16,16,16,16,16,16,17,18,18,18,18,18,18,18, +}; +const unsigned char vdpsl2cyc_40[] = { // slot # to 68k cycles/4 since HINT + 0, 13, 28, 33, 37, 47, 51, 56, 65, 70, 74, 84, 88, 93,102,107,112,120,121,123,123 +}; + +// NB code assumes fifo_* arrays have size 2^n +// last transferred FIFO data, ...x = index XXX currently only CPU +static short fifo_data[4], fifo_dx; +// queued FIFO transfers, ...x = index, ...l = queue length +// each entry has 2 values: [n]>>1=#writes, [n]&1=is VRAM byte access +static int fifo_queue[8], fifo_qx, fifo_ql; + +signed int fifo_cnt; // pending slots for current queue entry +unsigned short fifo_slot; // last executed slot in current scanline +unsigned int fifo_total; // total# of pending FIFO entries + +// sync FIFO to cycles +void PicoVideoFIFOSync(int cycles) +{ + struct PicoVideo *pv = &Pico.video; + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; + int slots, done; + + // calculate #slots since last executed slot + if (active) slots = cs[cycles/4]; + else slots = (cycles * vdpcyc2sl_bl[h40] + cycles) >> 16; + slots -= fifo_slot; + + // advance FIFO queue by #done slots + done = slots; + while (done > 0 && fifo_ql) { + int l = done, b = fifo_queue[fifo_qx&7] & 1; + if (l > fifo_cnt) + l = fifo_cnt; + fifo_total -= ((fifo_cnt & b) + l) >> b; + fifo_slot += l; + fifo_cnt -= l; + done -= l; + + if (fifo_cnt == 0) { + fifo_qx ++, fifo_ql --; + fifo_cnt= (fifo_queue[fifo_qx&7] >> 1) << (fifo_queue[fifo_qx&7] & 1); + } + } + + // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore + if (fifo_total <= 4) { + pv->status &= ~PVS_CPUWR; + pv->command &= ~0x80; + if (!(pv->status & PVS_DMAPEND)) + pv->status &= ~(SR_DMA|PVS_DMAFILL); + } + if (fifo_total == 0) + pv->status &= ~PVS_CPURD; +} + +// drain FIFO, blocking 68k on the way. FIFO must be synced prior to drain. +int PicoVideoFIFODrain(int level, int cycles) +{ + struct PicoVideo *pv = &Pico.video; + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *sc = h40 ? vdpsl2cyc_40 : vdpsl2cyc_32; + int maxsl = vdpslots[h40 + 2*active]; // max xfer slots in this scanline + int burn = 0; + + while (fifo_total > level && fifo_slot < maxsl) { + int b = fifo_queue[fifo_qx&7] & 1; + int cnt = (fifo_total-level) << b; + int last = fifo_slot; + int slot = (fifo_cnt maxsl) { + // target in later scanline, advance to eol + slot = maxsl; + fifo_slot = maxsl; + cycles = 488; + } else { + // advance FIFO to target slot and CPU to cycles at that slot + fifo_slot = slot; + if (active) cycles = sc[slot]*4; + else cycles = ((slot * vdpsl2cyc_bl[h40] + slot) >> 16); + } + burn += cycles - ocyc; + + slot -= last; + fifo_total -= ((fifo_cnt & b) + slot) >> b; + fifo_cnt -= slot; + + if (fifo_cnt == 0) { + fifo_qx ++, fifo_ql --; + fifo_cnt= (fifo_queue[fifo_qx&7] >> 1) << (fifo_queue[fifo_qx&7] & 1); + } + } + + // release CPU and terminate DMA if FIFO isn't blocking the bus anymore + if (fifo_total <= 4) { + pv->status &= ~PVS_CPUWR; + pv->command &= ~0x80; + if (!(pv->status & PVS_DMAPEND)) + pv->status &= ~(SR_DMA|PVS_DMAFILL); + } + if (fifo_total == 0) + pv->status &= ~PVS_CPURD; + + return burn; +} + +// read VDP data port +int PicoVideoFIFORead(void) +{ + struct PicoVideo *pv = &Pico.video; + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; + const unsigned char *sc = h40 ? vdpsl2cyc_40 : vdpsl2cyc_32; + int lc = SekCyclesDone()-Pico.t.m68c_line_start+4; + int burn = 0; + + PicoVideoFIFOSync(lc); + + // advance FIFO and CPU until FIFO is empty + burn = PicoVideoFIFODrain(0, lc); + lc += burn; + if (fifo_total > 0) + pv->status |= PVS_CPURD; // target slot is in later scanline + else { + // use next VDP access slot for reading, block 68k until then + if (active) { + fifo_slot = cs[lc/4] + 1; + burn += sc[fifo_slot]*4; + } else { + fifo_slot = ((lc * vdpcyc2sl_bl[h40] + lc) >> 16) + 1; + burn += ((fifo_slot * vdpsl2cyc_bl[h40] + fifo_slot) >> 16); + } + burn -= lc; + } + + return burn; +} + +// write VDP data port +int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask,unsigned sr_flags) +{ + struct PicoVideo *pv = &Pico.video; + int active = !(pv->status & SR_VB) && (pv->reg[1] & 0x40); + int h40 = pv->reg[12] & 1; + const unsigned char *cs = h40 ? vdpcyc2sl_40 : vdpcyc2sl_32; + int lc = SekCyclesDone()-Pico.t.m68c_line_start+4; + int burn = 0; + + PicoVideoFIFOSync(lc); + pv->status = (pv->status & ~sr_mask) | sr_flags; + + if (count) { + // update FIFO state if it was empty + if (fifo_total == 0 && count) { + if (active) fifo_slot = cs[lc/4]; + else fifo_slot = (lc * vdpcyc2sl_bl[h40] + lc) >> 16; + fifo_cnt = count << byte_p; + } + + // create xfer queue entry + int x = (fifo_qx + fifo_ql) & 7; + fifo_queue[x] = (count << 1) | byte_p; + fifo_ql ++; + fifo_total += count; + } + + // if CPU is waiting for the bus, advance CPU and FIFO until bus is free + if ((pv->status & (PVS_CPUWR|PVS_DMAFILL)) == PVS_CPUWR) + burn = PicoVideoFIFODrain(4, lc); + + return burn; +} + +// at HINT, advance FIFO to new scanline +int PicoVideoFIFOHint(void) +{ + struct PicoVideo *pv = &Pico.video; + int burn = 0; + + // reset slot to start of scanline + fifo_slot = 0; + + // if CPU is waiting for the bus, advance CPU and FIFO until bus is free + if (pv->status & PVS_CPURD) + burn = PicoVideoFIFORead(); + if (pv->status & PVS_CPUWR) + burn = PicoVideoFIFOWrite(0, 0, 0, 0); + + return burn; +} + +// switch FIFO mode between active/inactive display +void PicoVideoFIFOMode(int active) +{ + struct PicoVideo *pv = &Pico.video; + const unsigned char *cs = pv->reg[12]&1 ? vdpcyc2sl_40 : vdpcyc2sl_32; + int h40 = pv->reg[12] & 1; + int lc = SekCyclesDone() - Pico.t.m68c_line_start; + + PicoVideoFIFOSync(lc); + + if (fifo_total) { + // recalculate FIFO slot for new mode + if (!(pv->status & SR_VB) && active) + fifo_slot = cs[lc/4]; + else fifo_slot = ((lc * vdpcyc2sl_bl[h40] + lc) >> 16); + } +} + + +// VDP memory rd/wr + static __inline void AutoIncrement(void) { Pico.video.addr=(unsigned short)(Pico.video.addr+Pico.video.reg[0xf]); @@ -60,15 +347,19 @@ static void VideoWrite(u16 d) static unsigned int VideoRead(void) { - unsigned int a=0,d=0; + unsigned int a, d = fifo_data[(fifo_dx+1)&3]; a=Pico.video.addr; a>>=1; + SekCyclesBurnRun(PicoVideoFIFORead()); switch (Pico.video.type) { case 0: d=PicoMem.vram [a & 0x7fff]; break; - case 8: d=PicoMem.cram [a & 0x003f]; break; - case 4: d=PicoMem.vsram[a & 0x003f]; break; + case 8: d=(PicoMem.cram [a & 0x003f] & 0x0eee) | (d & ~0x0eee); break; + case 4: if ((a & 0x3f) >= 0x28) a = 0; + d=(PicoMem.vsram [a & 0x003f] & 0x07ff) | (d & ~0x07ff); break; + case 12:a=PicoMem.vram [a & 0x7fff]; if (Pico.video.addr&1) a >>= 8; + d=(a & 0x00ff) | (d & ~0x00ff); break; default:elprintf(EL_ANOMALY, "VDP read with bad type %i", Pico.video.type); break; } @@ -76,6 +367,8 @@ static unsigned int VideoRead(void) return d; } +// VDP DMA + static int GetDmaLength(void) { struct PicoVideo *pvid=&Pico.video; @@ -95,13 +388,11 @@ static void DmaSlow(int len, unsigned int source) u32 mask = 0x1ffff; elprintf(EL_VDPDMA, "DmaSlow[%i] %06x->%04x len %i inc=%i blank %i [%u] @ %06x", - Pico.video.type, source, a, len, inc, (Pico.video.status&8)||!(Pico.video.reg[1]&0x40), + Pico.video.type, source, a, len, inc, (Pico.video.status&SR_VB)||!(Pico.video.reg[1]&0x40), SekCyclesDone(), SekPc); - Pico.m.dma_xfers = len; - if (Pico.m.dma_xfers < len) // lame 16bit var - Pico.m.dma_xfers = ~0; - SekCyclesBurnRun(CheckDMA(488 - (SekCyclesDone()-Pico.t.m68c_line_start))); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, Pico.video.type == 1, PVS_DMAPEND, + SR_DMA | PVS_CPUWR) + 8); if ((source & 0xe00000) == 0xe00000) { // Ram base = (u16 *)PicoMem.ram; @@ -224,14 +515,12 @@ static void DmaCopy(int len) int source; elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); - Pico.m.dma_xfers = len; - if (Pico.m.dma_xfers < len) - Pico.m.dma_xfers = ~0; - Pico.video.status |= SR_DMA; + SekCyclesBurnRun(PicoVideoFIFOWrite(len, 1, PVS_CPUWR|PVS_DMAPEND, SR_DMA)); source =Pico.video.reg[0x15]; source|=Pico.video.reg[0x16]<<8; + // XXX implement VRAM 128k? Is this even working? for (; len; len--) { vr[a] = vr[source++ & 0xffff]; @@ -255,10 +544,7 @@ static NOINLINE void DmaFill(int data) len = GetDmaLength(); elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); - Pico.m.dma_xfers = len; - if (Pico.m.dma_xfers < len) // lame 16bit var - Pico.m.dma_xfers = ~0; - Pico.video.status |= SR_DMA; + SekCyclesBurnRun(PicoVideoFIFOWrite(len, Pico.video.type == 1, PVS_CPUWR|PVS_DMAPEND, SR_DMA)); switch (Pico.video.type) { @@ -274,13 +560,24 @@ static NOINLINE void DmaFill(int data) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: // cram + Pico.m.dirtyPal = 1; + for (l = len; l; l--) { + PicoMem.cram[(a/2) & 0x3f] = data; + + // Increment address register + a += inc; + } + break; case 5: { // vsram - // TODO: needs fifo; anyone using these? - static int once; - if (!once++) - elprintf(EL_STATUS|EL_ANOMALY|EL_VDPDMA, "TODO: cram/vsram fill"); + for (l = len; l; l--) { + PicoMem.vsram[(a/2) & 0x3f] = data; + + // Increment address register + a += inc; + } + break; } - case 0x81: + case 0x81: // vram 128k for (l = len; l; l--) { VideoWrite128(a, data); @@ -307,17 +604,22 @@ static NOINLINE void DmaFill(int data) } +// VDP command handling + static NOINLINE void CommandDma(void) { struct PicoVideo *pvid=&Pico.video; u32 len, method; u32 source; - if ((pvid->reg[1]&0x10)==0) return; // DMA not enabled - - if (Pico.m.dma_xfers) + pvid->status |= PVS_DMAPEND; + PicoVideoFIFOSync(SekCyclesDone()-Pico.t.m68c_line_start); + if (pvid->status & SR_DMA) { elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", - Pico.m.dma_xfers, SekPc); + fifo_total, SekPc); + fifo_total = fifo_ql = 0; + } + pvid->status |= SR_DMA; len = GetDmaLength(); source =Pico.video.reg[0x15]; @@ -329,9 +631,10 @@ static NOINLINE void CommandDma(void) DmaSlow(len, source << 1); // 68000 to VDP else if (method == 3) DmaCopy(len); // VRAM Copy - else + else { + pvid->status |= PVS_DMAFILL; return; - + } source += len; Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0; Pico.video.reg[0x15] = source; @@ -357,13 +660,21 @@ static NOINLINE void CommandChange(void) pvid->addr_u = (u8)((cmd >> 2) & 1); } -static void DrawSync(int blank_on) +// VDP interface + +static void DrawSync(int skip) { int lines = Pico.video.reg[1]&0x08 ? 240 : 224; - if (Pico.m.scanline < lines && !(PicoIn.opt & POPT_ALT_RENDERER) && - !PicoIn.skipFrame && Pico.est.DrawScanline <= Pico.m.scanline) { + int last = Pico.m.scanline - (skip || blankline == Pico.m.scanline); + + if (last < lines && !(PicoIn.opt & POPT_ALT_RENDERER) && + !PicoIn.skipFrame && Pico.est.DrawScanline <= last) { //elprintf(EL_ANOMALY, "sync"); - PicoDrawSync(Pico.m.scanline, blank_on); + if (blankline >= 0 && blankline < last) { + PicoDrawSync(blankline, 1); + blankline = -1; + } + PicoDrawSync(last, 0); } } @@ -390,19 +701,19 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) pvid->pending=0; } - if (!(pvid->status & SR_VB) && (pvid->reg[1]&0x40) && !(PicoIn.opt&POPT_DIS_VDP_FIFO)) + if (!(PicoIn.opt&POPT_DIS_VDP_FIFO)) { - int use = pvid->type == 1 ? 2 : 1; - pvid->lwrite_cnt -= use; - if (pvid->lwrite_cnt < 0) - SekCyclesBurnRun(488 - (SekCyclesDone()-Pico.t.m68c_line_start)); - elprintf(EL_ASVDP, "VDP data write: [%04x] %04x [%u] {%i} #%i @ %06x", - Pico.video.addr, d, SekCyclesDone(), Pico.video.type, pvid->lwrite_cnt, SekPc); + fifo_data[++fifo_dx&3] = d; + SekCyclesBurnRun(PicoVideoFIFOWrite(1, pvid->type == 1, 0, PVS_CPUWR)); + + elprintf(EL_ASVDP, "VDP data write: [%04x] %04x [%u] {%i} @ %06x", + Pico.video.addr, d, SekCyclesDone(), Pico.video.type, SekPc); } VideoWrite(d); - if ((pvid->command&0x80) && (pvid->reg[1]&0x10) && (pvid->reg[0x17]>>6)==2) - DmaFill(d); + // start DMA fill on write. NB VSRAM and CRAM fills use wrong FIFO data. + if ((pvid->status & (PVS_DMAPEND|PVS_DMAFILL)) == (PVS_DMAPEND|PVS_DMAFILL)) + DmaFill(fifo_data[(fifo_dx + !!(pvid->type&~0x81))&3]); break; @@ -410,6 +721,8 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) if (pvid->pending) { // Low word of command: + if (!(pvid->reg[1]&0x10)) + d = (d&~0x80)|(pvid->command&0x80); pvid->command &= 0xffff0000; pvid->command |= d; pvid->pending = 0; @@ -427,16 +740,24 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) // Register write: int num=(d>>8)&0x1f; int dold=pvid->reg[num]; - int blank_on = 0; + int skip=0; pvid->type=0; // register writes clear command (else no Sega logo in Golden Axe II) if (num > 0x0a && !(pvid->reg[1]&4)) { elprintf(EL_ANOMALY, "%02x written to reg %02x in SMS mode @ %06x", d, num, SekPc); return; } - if (num == 1 && !(d&0x40) && SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) - blank_on = 1; - DrawSync(blank_on); + if (num == 0 && !(pvid->reg[0]&2) && (d&2)) + hvlatch = PicoVideoRead(0x08); + if (num == 1 && ((pvid->reg[1]^d)&0x40)) { + PicoVideoFIFOMode(d & 0x40); + // handle line blanking before line rendering + if (SekCyclesDone() - Pico.t.m68c_line_start <= 488-390) { + skip = 1; + blankline = d&0x40 ? -1 : Pico.m.scanline; + } + } + DrawSync(skip); pvid->reg[num]=(unsigned char)d; switch (num) { @@ -519,15 +840,23 @@ update_irq: } } -static u32 SrLow(const struct PicoVideo *pv) +static u32 VideoSr(const struct PicoVideo *pv) { unsigned int c, d = pv->status; + unsigned int hp = pv->reg[12]&1 ? 32:40; // HBLANK start + unsigned int hl = pv->reg[12]&1 ? 94:84; // HBLANK length c = SekCyclesDone(); - if (c - Pico.t.m68c_line_start - 39 < 92) + if (c - Pico.t.m68c_line_start - hp < hl) d |= SR_HB; - if (CYCLES_GT(c, Pico.t.dma_end)) - d &= ~SR_DMA; + + PicoVideoFIFOSync(c-Pico.t.m68c_line_start); + if (pv->status & SR_DMA) + d |= SR_EMPT; // unused by DMA, or rather flags not updated? + else if (fifo_total >= 4) + d |= SR_FULL; + else if (!fifo_total) + d |= SR_EMPT; return d; } @@ -538,8 +867,11 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) if (a == 0x04) // control port { struct PicoVideo *pv = &Pico.video; - unsigned int d = SrLow(pv); - pv->pending = 0; + unsigned int d = VideoSr(pv); + if (pv->pending) { + CommandChange(); + pv->pending = 0; + } elprintf(EL_SR, "SR read: %04x [%u] @ %06x", d, SekCyclesDone(), SekPc); return d; } @@ -564,12 +896,14 @@ PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) unsigned int d; d = (SekCyclesDone() - Pico.t.m68c_line_start) & 0x1ff; // FIXME - if (Pico.video.reg[12]&1) - d = hcounts_40[d]; - else d = hcounts_32[d]; + if (Pico.video.reg[0]&2) + d = hvlatch; + else if (Pico.video.reg[12]&1) + d = hcounts_40[d] | (Pico.video.v_counter << 8); + else d = hcounts_32[d] | (Pico.video.v_counter << 8); elprintf(EL_HVCNT, "hv: %02x %02x [%u] @ %06x", d, Pico.video.v_counter, SekCyclesDone(), SekPc); - return d | (Pico.video.v_counter << 8); + return d; } if (a==0x00) // data port @@ -592,16 +926,22 @@ unsigned char PicoVideoRead8DataL(void) unsigned char PicoVideoRead8CtlH(void) { - u8 d = (u8)(Pico.video.status >> 8); - Pico.video.pending = 0; + u8 d = VideoSr(&Pico.video) >> 8; + if (Pico.video.pending) { + CommandChange(); + Pico.video.pending = 0; + } elprintf(EL_SR, "SR read (h): %02x @ %06x", d, SekPc); return d; } unsigned char PicoVideoRead8CtlL(void) { - u8 d = SrLow(&Pico.video); - Pico.video.pending = 0; + u8 d = VideoSr(&Pico.video); + if (Pico.video.pending) { + CommandChange(); + Pico.video.pending = 0; + } elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); return d; } -- 2.39.2