From 05c7cec77522f04857f655474574469a5e66661d Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 11 Jan 2015 03:35:32 +0200 Subject: [PATCH] spu: put reverb on the thread too and more stuff to work struct --- plugins/dfsound/externals.h | 13 ++- plugins/dfsound/registers.c | 78 +++++++-------- plugins/dfsound/reverb.c | 147 ++++++++++++----------------- plugins/dfsound/spu.c | 124 +++++++++++++++--------- plugins/dfsound/spu_c64x.c | 55 +++++++---- plugins/dfsound/spu_c64x.h | 8 +- plugins/dfsound/spu_c64x_dspcode.c | 26 +++-- 7 files changed, 247 insertions(+), 204 deletions(-) diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 4832fac8..f6fc4409 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -28,6 +28,11 @@ #define noinline #define unlikely(x) x #endif +#if defined(__GNUC__) && !defined(_TMS320C6X) +#define preload __builtin_prefetch +#else +#define preload(...) +#endif #define PSE_LT_SPU 4 #define PSE_SPU_ERR_SUCCESS 0 @@ -121,8 +126,6 @@ typedef struct int VolLeft; int VolRight; - int iRVBLeft; - int iRVBRight; int FB_SRC_A; // (offset) int FB_SRC_B; // (offset) @@ -224,9 +227,10 @@ typedef struct int iRightXAVol; SPUCHAN * s_chan; - int * SB; + REVERBInfo * rvb; - int * RVB; + // buffers + int * SB; int * SSumLR; int pad[29]; @@ -240,7 +244,6 @@ typedef struct #ifndef _IN_SPU extern SPUInfo spu; -extern REVERBInfo rvb; void do_samples(unsigned int cycles_to, int do_sync); void schedule_next_irq(void); diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 7a49b1ca..4588fa7c 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -146,14 +146,14 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, //-------------------------------------------------// case H_SPUReverbAddr: if(val==0xFFFF || val<=0x200) - {rvb.StartAddr=rvb.CurrAddr=0;} + {spu.rvb->StartAddr=spu.rvb->CurrAddr=0;} else { const long iv=(unsigned long)val<<2; - if(rvb.StartAddr!=iv) + if(spu.rvb->StartAddr!=iv) { - rvb.StartAddr=(unsigned long)val<<2; - rvb.CurrAddr=rvb.StartAddr; + spu.rvb->StartAddr=(unsigned long)val<<2; + spu.rvb->CurrAddr=spu.rvb->StartAddr; } } goto rvbd; @@ -163,11 +163,11 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, goto upd_irq; //-------------------------------------------------// case H_SPUrvolL: - rvb.VolLeft=val; + spu.rvb->VolLeft=val; break; //-------------------------------------------------// case H_SPUrvolR: - rvb.VolRight=val; + spu.rvb->VolRight=val; break; //-------------------------------------------------// @@ -246,38 +246,38 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, ReverbOn(16,24,val); break; //-------------------------------------------------// - case H_Reverb+0 : rvb.FB_SRC_A=val*4; goto rvbd; - case H_Reverb+2 : rvb.FB_SRC_B=val*4; goto rvbd; - case H_Reverb+4 : rvb.IIR_ALPHA=(short)val; goto rvbd; - case H_Reverb+6 : rvb.ACC_COEF_A=(short)val; goto rvbd; - case H_Reverb+8 : rvb.ACC_COEF_B=(short)val; goto rvbd; - case H_Reverb+10 : rvb.ACC_COEF_C=(short)val; goto rvbd; - case H_Reverb+12 : rvb.ACC_COEF_D=(short)val; goto rvbd; - case H_Reverb+14 : rvb.IIR_COEF=(short)val; goto rvbd; - case H_Reverb+16 : rvb.FB_ALPHA=(short)val; goto rvbd; - case H_Reverb+18 : rvb.FB_X=(short)val; goto rvbd; - case H_Reverb+20 : rvb.IIR_DEST_A0=val*4; goto rvbd; - case H_Reverb+22 : rvb.IIR_DEST_A1=val*4; goto rvbd; - case H_Reverb+24 : rvb.ACC_SRC_A0=val*4; goto rvbd; - case H_Reverb+26 : rvb.ACC_SRC_A1=val*4; goto rvbd; - case H_Reverb+28 : rvb.ACC_SRC_B0=val*4; goto rvbd; - case H_Reverb+30 : rvb.ACC_SRC_B1=val*4; goto rvbd; - case H_Reverb+32 : rvb.IIR_SRC_A0=val*4; goto rvbd; - case H_Reverb+34 : rvb.IIR_SRC_A1=val*4; goto rvbd; - case H_Reverb+36 : rvb.IIR_DEST_B0=val*4; goto rvbd; - case H_Reverb+38 : rvb.IIR_DEST_B1=val*4; goto rvbd; - case H_Reverb+40 : rvb.ACC_SRC_C0=val*4; goto rvbd; - case H_Reverb+42 : rvb.ACC_SRC_C1=val*4; goto rvbd; - case H_Reverb+44 : rvb.ACC_SRC_D0=val*4; goto rvbd; - case H_Reverb+46 : rvb.ACC_SRC_D1=val*4; goto rvbd; - case H_Reverb+48 : rvb.IIR_SRC_B1=val*4; goto rvbd; - case H_Reverb+50 : rvb.IIR_SRC_B0=val*4; goto rvbd; - case H_Reverb+52 : rvb.MIX_DEST_A0=val*4; goto rvbd; - case H_Reverb+54 : rvb.MIX_DEST_A1=val*4; goto rvbd; - case H_Reverb+56 : rvb.MIX_DEST_B0=val*4; goto rvbd; - case H_Reverb+58 : rvb.MIX_DEST_B1=val*4; goto rvbd; - case H_Reverb+60 : rvb.IN_COEF_L=(short)val; goto rvbd; - case H_Reverb+62 : rvb.IN_COEF_R=(short)val; goto rvbd; + case H_Reverb+0 : spu.rvb->FB_SRC_A=val*4; goto rvbd; + case H_Reverb+2 : spu.rvb->FB_SRC_B=val*4; goto rvbd; + case H_Reverb+4 : spu.rvb->IIR_ALPHA=(short)val; goto rvbd; + case H_Reverb+6 : spu.rvb->ACC_COEF_A=(short)val; goto rvbd; + case H_Reverb+8 : spu.rvb->ACC_COEF_B=(short)val; goto rvbd; + case H_Reverb+10 : spu.rvb->ACC_COEF_C=(short)val; goto rvbd; + case H_Reverb+12 : spu.rvb->ACC_COEF_D=(short)val; goto rvbd; + case H_Reverb+14 : spu.rvb->IIR_COEF=(short)val; goto rvbd; + case H_Reverb+16 : spu.rvb->FB_ALPHA=(short)val; goto rvbd; + case H_Reverb+18 : spu.rvb->FB_X=(short)val; goto rvbd; + case H_Reverb+20 : spu.rvb->IIR_DEST_A0=val*4; goto rvbd; + case H_Reverb+22 : spu.rvb->IIR_DEST_A1=val*4; goto rvbd; + case H_Reverb+24 : spu.rvb->ACC_SRC_A0=val*4; goto rvbd; + case H_Reverb+26 : spu.rvb->ACC_SRC_A1=val*4; goto rvbd; + case H_Reverb+28 : spu.rvb->ACC_SRC_B0=val*4; goto rvbd; + case H_Reverb+30 : spu.rvb->ACC_SRC_B1=val*4; goto rvbd; + case H_Reverb+32 : spu.rvb->IIR_SRC_A0=val*4; goto rvbd; + case H_Reverb+34 : spu.rvb->IIR_SRC_A1=val*4; goto rvbd; + case H_Reverb+36 : spu.rvb->IIR_DEST_B0=val*4; goto rvbd; + case H_Reverb+38 : spu.rvb->IIR_DEST_B1=val*4; goto rvbd; + case H_Reverb+40 : spu.rvb->ACC_SRC_C0=val*4; goto rvbd; + case H_Reverb+42 : spu.rvb->ACC_SRC_C1=val*4; goto rvbd; + case H_Reverb+44 : spu.rvb->ACC_SRC_D0=val*4; goto rvbd; + case H_Reverb+46 : spu.rvb->ACC_SRC_D1=val*4; goto rvbd; + case H_Reverb+48 : spu.rvb->IIR_SRC_B1=val*4; goto rvbd; + case H_Reverb+50 : spu.rvb->IIR_SRC_B0=val*4; goto rvbd; + case H_Reverb+52 : spu.rvb->MIX_DEST_A0=val*4; goto rvbd; + case H_Reverb+54 : spu.rvb->MIX_DEST_A1=val*4; goto rvbd; + case H_Reverb+56 : spu.rvb->MIX_DEST_B0=val*4; goto rvbd; + case H_Reverb+58 : spu.rvb->MIX_DEST_B1=val*4; goto rvbd; + case H_Reverb+60 : spu.rvb->IN_COEF_L=(short)val; goto rvbd; + case H_Reverb+62 : spu.rvb->IN_COEF_R=(short)val; goto rvbd; } return; @@ -287,7 +287,7 @@ upd_irq: return; rvbd: - rvb.dirty = 1; // recalculate on next update + spu.rvb->dirty = 1; // recalculate on next update } //////////////////////////////////////////////////////////////////////// diff --git a/plugins/dfsound/reverb.c b/plugins/dfsound/reverb.c index 7e32b8e8..2ff6edc8 100644 --- a/plugins/dfsound/reverb.c +++ b/plugins/dfsound/reverb.c @@ -50,41 +50,40 @@ INLINE int rvb2ram_offs(int curr, int space, int iOff) // get_buffer content helper: takes care about wraps #define g_buffer(var) \ - ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var)]) + ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)]) // saturate iVal and store it as var #define s_buffer(var, iVal) \ ssat32_to_16(iVal); \ - spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var)] = iVal + spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)] = iVal #define s_buffer1(var, iVal) \ ssat32_to_16(iVal); \ - spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var + 1)] = iVal + spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var + 1)] = iVal //////////////////////////////////////////////////////////////////////// // portions based on spu2-x from PCSX2 -static void MixREVERB(int *SSumLR, int *RVB, int ns_to) +static void MixREVERB(int *SSumLR, int *RVB, int ns_to, int curr_addr) { - int l_old = rvb.iRVBLeft; - int r_old = rvb.iRVBRight; - int curr_addr = rvb.CurrAddr; - int space = 0x40000 - rvb.StartAddr; - int l = 0, r = 0, ns; + const REVERBInfo *rvb = spu.rvb; + int IIR_ALPHA = rvb->IIR_ALPHA; + int IIR_COEF = rvb->IIR_COEF; + int space = 0x40000 - rvb->StartAddr; + int l, r, ns; for (ns = 0; ns < ns_to * 2; ) { - int IIR_ALPHA = rvb.IIR_ALPHA; int ACC0, ACC1, FB_A0, FB_A1, FB_B0, FB_B1; int mix_dest_a0, mix_dest_a1, mix_dest_b0, mix_dest_b1; - int input_L = RVB[ns] * rvb.IN_COEF_L; - int input_R = RVB[ns+1] * rvb.IN_COEF_R; + int input_L = RVB[ns] * rvb->IN_COEF_L; + int input_R = RVB[ns+1] * rvb->IN_COEF_R; - int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * rvb.IIR_COEF) + input_L) >> 15; - int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * rvb.IIR_COEF) + input_R) >> 15; - int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * rvb.IIR_COEF) + input_L) >> 15; - int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * rvb.IIR_COEF) + input_R) >> 15; + int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * IIR_COEF) + input_L) >> 15; + int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * IIR_COEF) + input_R) >> 15; + int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * IIR_COEF) + input_L) >> 15; + int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * IIR_COEF) + input_R) >> 15; int iir_dest_a0 = g_buffer(IIR_DEST_A0); int iir_dest_a1 = g_buffer(IIR_DEST_A1); @@ -96,30 +95,34 @@ static void MixREVERB(int *SSumLR, int *RVB, int ns_to) int IIR_B0 = iir_dest_b0 + ((IIR_INPUT_B0 - iir_dest_b0) * IIR_ALPHA >> 15); int IIR_B1 = iir_dest_b1 + ((IIR_INPUT_B1 - iir_dest_b1) * IIR_ALPHA >> 15); + preload(SSumLR + ns + 64*2/4 - 4); + s_buffer1(IIR_DEST_A0, IIR_A0); s_buffer1(IIR_DEST_A1, IIR_A1); s_buffer1(IIR_DEST_B0, IIR_B0); s_buffer1(IIR_DEST_B1, IIR_B1); - ACC0 = (g_buffer(ACC_SRC_A0) * rvb.ACC_COEF_A + - g_buffer(ACC_SRC_B0) * rvb.ACC_COEF_B + - g_buffer(ACC_SRC_C0) * rvb.ACC_COEF_C + - g_buffer(ACC_SRC_D0) * rvb.ACC_COEF_D) >> 15; - ACC1 = (g_buffer(ACC_SRC_A1) * rvb.ACC_COEF_A + - g_buffer(ACC_SRC_B1) * rvb.ACC_COEF_B + - g_buffer(ACC_SRC_C1) * rvb.ACC_COEF_C + - g_buffer(ACC_SRC_D1) * rvb.ACC_COEF_D) >> 15; + preload(RVB + ns + 64*2/4 - 4); + + ACC0 = (g_buffer(ACC_SRC_A0) * rvb->ACC_COEF_A + + g_buffer(ACC_SRC_B0) * rvb->ACC_COEF_B + + g_buffer(ACC_SRC_C0) * rvb->ACC_COEF_C + + g_buffer(ACC_SRC_D0) * rvb->ACC_COEF_D) >> 15; + ACC1 = (g_buffer(ACC_SRC_A1) * rvb->ACC_COEF_A + + g_buffer(ACC_SRC_B1) * rvb->ACC_COEF_B + + g_buffer(ACC_SRC_C1) * rvb->ACC_COEF_C + + g_buffer(ACC_SRC_D1) * rvb->ACC_COEF_D) >> 15; FB_A0 = g_buffer(FB_SRC_A0); FB_A1 = g_buffer(FB_SRC_A1); FB_B0 = g_buffer(FB_SRC_B0); FB_B1 = g_buffer(FB_SRC_B1); - mix_dest_a0 = ACC0 - ((FB_A0 * rvb.FB_ALPHA) >> 15); - mix_dest_a1 = ACC1 - ((FB_A1 * rvb.FB_ALPHA) >> 15); + mix_dest_a0 = ACC0 - ((FB_A0 * rvb->FB_ALPHA) >> 15); + mix_dest_a1 = ACC1 - ((FB_A1 * rvb->FB_ALPHA) >> 15); - mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb.FB_ALPHA - FB_B0 * rvb.FB_X) >> 15); - mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb.FB_ALPHA - FB_B1 * rvb.FB_X) >> 15); + mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb->FB_ALPHA - FB_B0 * rvb->FB_X) >> 15); + mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb->FB_ALPHA - FB_B1 * rvb->FB_X) >> 15); s_buffer(MIX_DEST_A0, mix_dest_a0); s_buffer(MIX_DEST_A1, mix_dest_a1); @@ -129,73 +132,60 @@ static void MixREVERB(int *SSumLR, int *RVB, int ns_to) l = (mix_dest_a0 + mix_dest_b0) / 2; r = (mix_dest_a1 + mix_dest_b1) / 2; - l = (l * rvb.VolLeft) >> 15; // 15? - r = (r * rvb.VolRight) >> 15; + l = (l * rvb->VolLeft) >> 15; // 15? + r = (r * rvb->VolRight) >> 15; - SSumLR[ns++] += (l + l_old) / 2; - SSumLR[ns++] += (r + r_old) / 2; SSumLR[ns++] += l; SSumLR[ns++] += r; - - l_old = l; - r_old = r; + SSumLR[ns++] += l; + SSumLR[ns++] += r; curr_addr++; - if (curr_addr >= 0x40000) curr_addr = rvb.StartAddr; + if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr; } - - rvb.iRVBLeft = l; - rvb.iRVBRight = r; - rvb.CurrAddr = curr_addr; } -static void MixREVERB_off(int *SSumLR, int ns_to) +static void MixREVERB_off(int *SSumLR, int ns_to, int curr_addr) { - int l_old = rvb.iRVBLeft; - int r_old = rvb.iRVBRight; - int curr_addr = rvb.CurrAddr; - int space = 0x40000 - rvb.StartAddr; - int l = 0, r = 0, ns; + const REVERBInfo *rvb = spu.rvb; + int space = 0x40000 - rvb->StartAddr; + int l, r, ns; for (ns = 0; ns < ns_to * 2; ) { + preload(SSumLR + ns + 64*2/4 - 4); + l = (g_buffer(MIX_DEST_A0) + g_buffer(MIX_DEST_B0)) / 2; r = (g_buffer(MIX_DEST_A1) + g_buffer(MIX_DEST_B1)) / 2; - l = (l * rvb.VolLeft) >> 15; - r = (r * rvb.VolRight) >> 15; + l = (l * rvb->VolLeft) >> 15; + r = (r * rvb->VolRight) >> 15; - SSumLR[ns++] += (l + l_old) / 2; - SSumLR[ns++] += (r + r_old) / 2; SSumLR[ns++] += l; SSumLR[ns++] += r; - - l_old = l; - r_old = r; + SSumLR[ns++] += l; + SSumLR[ns++] += r; curr_addr++; - if (curr_addr >= 0x40000) curr_addr = rvb.StartAddr; + if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr; } - - rvb.iRVBLeft = l; - rvb.iRVBRight = r; - rvb.CurrAddr = curr_addr; } -static void prepare_offsets(void) +static void REVERBPrep(void) { - int space = 0x40000 - rvb.StartAddr; + REVERBInfo *rvb = spu.rvb; + int space = 0x40000 - rvb->StartAddr; int t; #define prep_offs(v) \ - t = rvb.v; \ + t = rvb->v; \ while (t >= space) \ t -= space; \ - rvb.n##v = t + rvb->n##v = t #define prep_offs2(d, v1, v2) \ - t = rvb.v1 - rvb.v2; \ + t = rvb->v1 - rvb->v2; \ while (t >= space) \ t -= space; \ - rvb.n##d = t + rvb->n##d = t prep_offs(IIR_SRC_A0); prep_offs(IIR_SRC_A1); @@ -224,37 +214,18 @@ static void prepare_offsets(void) #undef prep_offs #undef prep_offs2 - rvb.dirty = 0; + rvb->dirty = 0; } -INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to) +INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to, int curr_addr) { - if (!rvb.StartAddr) // reverb is off - { - rvb.iRVBLeft = rvb.iRVBRight = 0; - return; - } - if (spu.spuCtrl & 0x80) // -> reverb on? oki { - if (unlikely(rvb.dirty)) - prepare_offsets(); - - MixREVERB(SSumLR, RVB, ns_to); - } - else if (rvb.VolLeft || rvb.VolRight) - { - if (unlikely(rvb.dirty)) - prepare_offsets(); - - MixREVERB_off(SSumLR, ns_to); + MixREVERB(SSumLR, RVB, ns_to, curr_addr); } - else // -> reverb off + else if (spu.rvb->VolLeft || spu.rvb->VolRight) { - // reverb runs anyway - rvb.CurrAddr += ns_to / 2; - while (rvb.CurrAddr >= 0x40000) - rvb.CurrAddr -= 0x40000 - rvb.StartAddr; + MixREVERB_off(SSumLR, ns_to, curr_addr); } } diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index ec31b0ca..8681d354 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -73,11 +73,8 @@ static char * libraryInfo = N_("P.E.Op.S. Sound Driver V1.7\nCoded by Pete B SPUInfo spu; SPUConfig spu_config; -// MAIN infos struct for each channel - -REVERBInfo rvb; - static int iFMod[NSSIZE]; +static int RVB[NSSIZE * 2]; int ChanBuf[NSSIZE]; #define CDDA_BUFFER_SIZE (16384 * sizeof(uint32_t)) // must be power of 2 @@ -763,11 +760,13 @@ static void do_silent_chans(int ns_to, int silentch) static void do_channels(int ns_to) { unsigned int mask; + int do_rvb, ch, d; SPUCHAN *s_chan; int *SB, sinc; - int ch, d; - memset(spu.RVB, 0, ns_to * sizeof(spu.RVB[0]) * 2); + do_rvb = spu.rvb->StartAddr && spu_config.iUseReverb; + if (do_rvb) + memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2); mask = spu.dwNewChannel & 0xffffff; for (ch = 0; mask != 0; ch++, mask >>= 1) { @@ -812,14 +811,27 @@ static void do_channels(int ns_to) if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); - if (s_chan->bRVBActive) - mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, spu.RVB); + if (s_chan->bRVBActive && do_rvb) + mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, RVB); else mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); } + + if (spu.rvb->StartAddr) { + if (do_rvb) { + if (unlikely(spu.rvb->dirty)) + REVERBPrep(); + + REVERBDo(spu.SSumLR, RVB, ns_to, spu.rvb->CurrAddr); + } + + spu.rvb->CurrAddr += ns_to / 2; + while (spu.rvb->CurrAddr >= 0x40000) + spu.rvb->CurrAddr -= 0x40000 - spu.rvb->StartAddr; + } } -static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, +static void do_samples_finish(int *SSumLR, int ns_to, int silentch, int decode_pos); // optional worker thread handling @@ -833,8 +845,7 @@ static struct spu_worker { unsigned int exit_thread; unsigned int i_ready; unsigned int i_reaped; - unsigned int req_sent; // dsp - unsigned int last_boot_cnt; + unsigned int last_boot_cnt; // dsp }; // aligning for C64X_DSP unsigned int _pad0[128/4]; @@ -851,6 +862,7 @@ static struct spu_worker { int ns_to; int ctrl; int decode_pos; + int rvb_addr; unsigned int channels_new; unsigned int channels_on; unsigned int channels_silent; @@ -861,10 +873,11 @@ static struct spu_worker { int start; int loop; int ns_to; + short vol_l; + short vol_r; ADSRInfoEx adsr; - // might want to add vol and fmod flags.. + // might also want to add fmod flags.. } ch[24]; - int RVB[NSSIZE * 2]; int SSumLR[NSSIZE * 2]; } i[4]; } *worker; @@ -874,6 +887,7 @@ static struct spu_worker { static void thread_work_start(void); static void thread_work_wait_sync(struct work_item *work, int force); +static void thread_sync_caches(void); static int thread_get_i_done(void); static int decode_block_work(void *context, int ch, int *SB) @@ -936,6 +950,8 @@ static void queue_channel_work(int ns_to, unsigned int silentch) work->ch[ch].sbpos = s_chan->iSBPos; work->ch[ch].sinc = s_chan->sinc; work->ch[ch].adsr = s_chan->ADSRX; + work->ch[ch].vol_l = s_chan->iLeftVolume; + work->ch[ch].vol_r = s_chan->iRightVolume; work->ch[ch].start = s_chan->pCurr - spu.spuMemC; work->ch[ch].loop = s_chan->pLoop - spu.spuMemC; if (s_chan->prevflags & 1) @@ -952,6 +968,19 @@ static void queue_channel_work(int ns_to, unsigned int silentch) } } + work->rvb_addr = 0; + if (spu.rvb->StartAddr) { + if (spu_config.iUseReverb) { + if (unlikely(spu.rvb->dirty)) + REVERBPrep(); + work->rvb_addr = spu.rvb->CurrAddr; + } + + spu.rvb->CurrAddr += ns_to / 2; + while (spu.rvb->CurrAddr >= 0x40000) + spu.rvb->CurrAddr -= 0x40000 - spu.rvb->StartAddr; + } + worker->i_ready++; thread_work_start(); } @@ -965,7 +994,9 @@ static void do_channel_work(struct work_item *work) SPUCHAN *s_chan; ns_to = work->ns_to; - memset(work->RVB, 0, ns_to * sizeof(work->RVB[0]) * 2); + + if (work->rvb_addr) + memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2); mask = work->channels_new; for (ch = 0; mask != 0; ch++, mask >>= 1) { @@ -1010,12 +1041,15 @@ static void do_channel_work(struct work_item *work) if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); - if (s_chan->bRVBActive) + if (s_chan->bRVBActive && work->rvb_addr) mix_chan_rvb(work->SSumLR, ns_to, - s_chan->iLeftVolume, s_chan->iRightVolume, work->RVB); + work->ch[ch].vol_l, work->ch[ch].vol_r, RVB); else - mix_chan(work->SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); + mix_chan(work->SSumLR, ns_to, work->ch[ch].vol_l, work->ch[ch].vol_r); } + + if (work->rvb_addr) + REVERBDo(work->SSumLR, RVB, ns_to, work->rvb_addr); } static void sync_worker_thread(int force) @@ -1032,13 +1066,15 @@ static void sync_worker_thread(int force) work = &worker->i[worker->i_reaped & WORK_I_MASK]; thread_work_wait_sync(work, force); - do_samples_finish(work->SSumLR, work->RVB, work->ns_to, + do_samples_finish(work->SSumLR, work->ns_to, work->channels_silent, work->decode_pos); worker->i_reaped++; done = thread_get_i_done() - worker->i_reaped; used_space = worker->i_ready - worker->i_reaped; } + if (force) + thread_sync_caches(); } #else @@ -1116,7 +1152,7 @@ void do_samples(unsigned int cycles_to, int do_direct) if (do_direct || worker == NULL || !spu_config.iUseThread) { do_channels(ns_to); - do_samples_finish(spu.SSumLR, spu.RVB, ns_to, silentch, spu.decode_pos); + do_samples_finish(spu.SSumLR, ns_to, silentch, spu.decode_pos); } else { queue_channel_work(ns_to, silentch); @@ -1131,7 +1167,7 @@ void do_samples(unsigned int cycles_to, int do_direct) spu.decode_pos = (spu.decode_pos + ns_to) & 0x1ff; } -static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, +static void do_samples_finish(int *SSumLR, int ns_to, int silentch, int decode_pos) { int volmult = spu_config.iVolume; @@ -1150,17 +1186,8 @@ static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, spu.decode_dirty_ch &= ~(1<<3); } - //---------------------------------------------------// - // mix XA infos (if any) - MixXA(SSumLR, ns_to, decode_pos); - /////////////////////////////////////////////////////// - // mix all channels (including reverb) into one buffer - - if(spu_config.iUseReverb) - REVERBDo(SSumLR, RVB, ns_to); - if((spu.spuCtrl&0x4000)==0) // muted? (rare, don't optimize for this) { memset(spu.pS, 0, ns_to * 2 * sizeof(spu.pS[0])); @@ -1283,12 +1310,9 @@ void ClearWorkingState(void) } // SETUPSTREAMS: init most of the spu buffers -void SetupStreams(void) +static void SetupStreams(void) { - int i; - spu.pSpuBuffer = (unsigned char *)malloc(32768); // alloc mixing buffer - spu.RVB = calloc(NSSIZE * 2, sizeof(spu.RVB[0])); spu.SSumLR = calloc(NSSIZE * 2, sizeof(spu.SSumLR[0])); spu.XAStart = // alloc xa buffer @@ -1303,26 +1327,14 @@ void SetupStreams(void) spu.CDDAPlay = spu.CDDAStart; spu.CDDAFeed = spu.CDDAStart; - for(i=0;i init sustain - spu.s_chan[i].ADSRX.SustainIncrease = 1; - spu.s_chan[i].pLoop=spu.spuMemC; - spu.s_chan[i].pCurr=spu.spuMemC; - } - ClearWorkingState(); - - spu.bSpuInit=1; // flag: we are inited } // REMOVESTREAMS: free most buffer -void RemoveStreams(void) +static void RemoveStreams(void) { free(spu.pSpuBuffer); // free mixing buffer spu.pSpuBuffer = NULL; - free(spu.RVB); // free reverb buffer - spu.RVB = NULL; free(spu.SSumLR); spu.SSumLR = NULL; free(spu.XAStart); // free XA buffer @@ -1365,6 +1377,10 @@ static int thread_get_i_done(void) return worker->i_done; } +static void thread_sync_caches(void) +{ +} + static void *spu_worker_thread(void *unused) { struct work_item *work; @@ -1446,11 +1462,13 @@ static void exit_spu_thread(void) // SPUINIT: this func will be called first by the main emu long CALLBACK SPUinit(void) { + int i; + spu.spuMemC = calloc(1, 512 * 1024); - memset((void *)&rvb, 0, sizeof(REVERBInfo)); InitADSR(); spu.s_chan = calloc(MAXCHAN+1, sizeof(spu.s_chan[0])); // channel + 1 infos (1 is security for fmod handling) + spu.rvb = calloc(1, sizeof(REVERBInfo)); spu.SB = calloc(MAXCHAN, sizeof(spu.SB[0]) * SB_SIZE); spu.spuAddr = 0; @@ -1464,6 +1482,16 @@ long CALLBACK SPUinit(void) init_spu_thread(); + for (i = 0; i < MAXCHAN; i++) // loop sound channels + { + spu.s_chan[i].ADSRX.SustainLevel = 0xf; // -> init sustain + spu.s_chan[i].ADSRX.SustainIncrease = 1; + spu.s_chan[i].pLoop = spu.spuMemC; + spu.s_chan[i].pCurr = spu.spuMemC; + } + + spu.bSpuInit=1; // flag: we are inited + return 0; } @@ -1504,6 +1532,8 @@ long CALLBACK SPUshutdown(void) spu.SB = NULL; free(spu.s_chan); spu.s_chan = NULL; + free(spu.rvb); + spu.rvb = NULL; RemoveStreams(); // no more streaming spu.bSpuInit=0; diff --git a/plugins/dfsound/spu_c64x.c b/plugins/dfsound/spu_c64x.c index 200ab384..be10a6bb 100644 --- a/plugins/dfsound/spu_c64x.c +++ b/plugins/dfsound/spu_c64x.c @@ -43,6 +43,8 @@ static struct { dsp_mem_region_t region; dsp_component_id_t compid; + unsigned int stale_caches:1; + unsigned int req_sent:1; } f; static void thread_work_start(void) @@ -63,7 +65,7 @@ static void thread_work_start(void) // to start the DSP, dsp_rpc_send() must be used, // but before that, previous request must be finished - if (worker->req_sent) { + if (f.req_sent) { if (worker->boot_cnt == worker->last_boot_cnt) { // hopefully still booting //printf("booting?\n"); @@ -74,7 +76,7 @@ static void thread_work_start(void) if (ret != 0) { fprintf(stderr, "dsp_rpc_recv failed: %d\n", ret); f.dsp_logbuf_print(); - worker->req_sent = 0; + f.req_sent = 0; spu_config.iUseThread = 0; return; } @@ -94,7 +96,7 @@ static void thread_work_start(void) spu_config.iUseThread = 0; return; } - worker->req_sent = 1; + f.req_sent = 1; } static int thread_get_i_done(void) @@ -108,14 +110,13 @@ static void thread_work_wait_sync(struct work_item *work, int force) int limit = 1000; int ns_to; - ns_to = work->ns_to; - f.dsp_cache_inv_virt(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to); - f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to); - __builtin_prefetch(work->RVB); - __builtin_prefetch(work->SSumLR); - while (worker->i_done == worker->i_reaped && limit-- > 0) { - if (!worker->active) { + if (!f.req_sent) { + printf("dsp: req not sent?\n"); + break; + } + + if (worker->boot_cnt != worker->last_boot_cnt && !worker->active) { printf("dsp: broken sync\n"); worker->last_boot_cnt = ~0; break; @@ -125,6 +126,13 @@ static void thread_work_wait_sync(struct work_item *work, int force) f.dsp_cache_inv_virt(&worker->i_done, 64); } + ns_to = work->ns_to; + f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to); + preload(work->SSumLR); + preload(work->SSumLR + 64/4); + + f.stale_caches = 1; // SB, spuMem + if (limit == 0) printf("dsp: wait timeout\n"); @@ -132,7 +140,7 @@ static void thread_work_wait_sync(struct work_item *work, int force) if (worker->i_reaped != worker->i_done - 1) return; - if (worker->req_sent && (force || worker->i_done == worker->i_ready)) { + if (f.req_sent && (force || worker->i_done == worker->i_ready)) { dsp_msg_t msg; int ret; @@ -142,12 +150,20 @@ static void thread_work_wait_sync(struct work_item *work, int force) f.dsp_logbuf_print(); spu_config.iUseThread = 0; } - worker->req_sent = 0; + f.req_sent = 0; } +} - if (force) { +static void thread_sync_caches(void) +{ + if (f.stale_caches) { f.dsp_cache_inv_virt(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24); f.dsp_cache_inv_virt(spu.spuMemC + 0x800, 0x800); + if (spu.rvb->StartAddr) { + int left = 0x40000 - spu.rvb->StartAddr; + f.dsp_cache_inv_virt(spu.spuMem + spu.rvb->StartAddr, left * 2); + } + f.stale_caches = 0; } } @@ -220,9 +236,9 @@ static void init_spu_thread(void) mem->sizeof_region_mem, sizeof(*mem)); goto fail_init; } - if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), s_chan[1])) { + if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), in.s_chan[1])) { fprintf(stderr, "error: size mismatch 2: %d vs %zd\n", - mem->offsetof_s_chan1, offsetof(typeof(*mem), s_chan[1])); + mem->offsetof_s_chan1, offsetof(typeof(*mem), in.s_chan[1])); goto fail_init; } if (mem->offsetof_spos_3_20 != offsetof(typeof(*mem), worker.i[3].ch[20])) { @@ -237,7 +253,9 @@ static void init_spu_thread(void) free(spu.SB); spu.SB = mem->SB; free(spu.s_chan); - spu.s_chan = mem->s_chan; + spu.s_chan = mem->in.s_chan; + free(spu.rvb); + spu.rvb = &mem->in.rvb; worker = &mem->worker; printf("spu: C64x DSP ready (id=%d).\n", (int)f.compid); @@ -267,8 +285,10 @@ static void exit_spu_thread(void) if (worker == NULL) return; - if (worker->req_sent) + if (f.req_sent) { f.dsp_rpc_recv(&msg); + f.req_sent = 0; + } f.dsp_logbuf_print(); f.dsp_shm_free(f.region); @@ -277,6 +297,7 @@ static void exit_spu_thread(void) spu.spuMemC = NULL; spu.SB = NULL; spu.s_chan = NULL; + spu.rvb = NULL; worker = NULL; } diff --git a/plugins/dfsound/spu_c64x.h b/plugins/dfsound/spu_c64x.h index bb20cc34..8210e63a 100644 --- a/plugins/dfsound/spu_c64x.h +++ b/plugins/dfsound/spu_c64x.h @@ -11,8 +11,12 @@ struct region_mem { // careful not to lose ARM writes by DSP overwriting // with old data when it's writing out neighbor cachelines int _pad1[128/4 - ((SB_SIZE * 24) & (128/4 - 1))]; - SPUCHAN s_chan[24 + 1]; - int _pad2[128/4 - ((sizeof(SPUCHAN) * 25 / 4) & (128/4 - 1))]; + struct spu_in { + // these are not to be modified by DSP + SPUCHAN s_chan[24 + 1]; + REVERBInfo rvb; + } in; + int _pad2[128/4 - ((sizeof(struct spu_in) / 4) & (128/4 - 1))]; struct spu_worker worker; SPUConfig spu_config; // init/debug diff --git a/plugins/dfsound/spu_c64x_dspcode.c b/plugins/dfsound/spu_c64x_dspcode.c index 97d30280..b0352a9f 100644 --- a/plugins/dfsound/spu_c64x_dspcode.c +++ b/plugins/dfsound/spu_c64x_dspcode.c @@ -31,6 +31,7 @@ /* dummy deps, some bloat but avoids ifdef hell in SPU code.. */ static void thread_work_start(void) {} static void thread_work_wait_sync(struct work_item *work, int force) {} +static void thread_sync_caches(void) {} static int thread_get_i_done(void) { return 0; } struct out_driver *out_current; void SetupSound(void) {} @@ -38,7 +39,8 @@ void SetupSound(void) {} static void invalidate_cache(struct work_item *work) { - syscalls.cache_inv(work, offsetof(typeof(*work), RVB), 1); + // see comment in writeout_cache() + //syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1); syscalls.cache_inv(spu.s_chan, sizeof(spu.s_chan[0]) * 24, 0); syscalls.cache_inv(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * work->ns_to, 0); @@ -48,14 +50,16 @@ static void writeout_cache(struct work_item *work) { int ns_to = work->ns_to; - syscalls.cache_wb(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to, 1); syscalls.cache_wb(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to, 1); + // have to invalidate now, otherwise there is a race between + // DSP evicting dirty lines and ARM writing new data to this area + syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 0); } static void do_processing(void) { + int left, dirty = 0, had_rvb = 0; struct work_item *work; - int left, dirty = 0; while (worker->active) { @@ -70,6 +74,8 @@ static void do_processing(void) work = &worker->i[worker->i_done & WORK_I_MASK]; invalidate_cache(work); + had_rvb |= work->rvb_addr; + spu.spuCtrl = work->ctrl; do_channel_work(work); writeout_cache(work); @@ -82,6 +88,11 @@ static void do_processing(void) if (dirty) { syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1); syscalls.cache_wb(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24, 1); + if (had_rvb) { + left = 0x40000 - spu.rvb->StartAddr; + syscalls.cache_wb(spu.spuMem + spu.rvb->StartAddr, left * 2, 1); + had_rvb = 0; + } dirty = 0; continue; } @@ -105,12 +116,13 @@ static unsigned int exec(dsp_component_cmd_t cmd, spu.spuMemC = mem->spu_ram; spu.SB = mem->SB; - spu.s_chan = mem->s_chan; + spu.s_chan = mem->in.s_chan; + spu.rvb = &mem->in.rvb; worker = &mem->worker; memcpy(&spu_config, &mem->spu_config, sizeof(spu_config)); mem->sizeof_region_mem = sizeof(*mem); - mem->offsetof_s_chan1 = offsetof(typeof(*mem), s_chan[1]); + mem->offsetof_s_chan1 = offsetof(typeof(*mem), in.s_chan[1]); mem->offsetof_spos_3_20 = offsetof(typeof(*mem), worker.i[3].ch[20]); // seems to be unneeded, no write-alloc? but just in case.. syscalls.cache_wb(&mem->sizeof_region_mem, 3 * 4, 1); @@ -126,7 +138,9 @@ static unsigned int exec(dsp_component_cmd_t cmd, // c64_tools lib does BCACHE_wbInvAll() when it receives mailbox irq, // but invalidate anyway in case c64_tools is ever fixed.. - syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0); + // XXX edit: don't bother as reverb is not handled, will fix if needed + //syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0); + //syscalls.cache_inv(&mem->in, sizeof(mem->in), 0); break; default: -- 2.39.5