#define noinline\r
#define unlikely(x) x\r
#endif\r
+#if defined(__GNUC__) && !defined(_TMS320C6X)\r
+#define preload __builtin_prefetch\r
+#else\r
+#define preload(...)\r
+#endif\r
\r
#define PSE_LT_SPU 4\r
#define PSE_SPU_ERR_SUCCESS 0\r
\r
int VolLeft;\r
int VolRight;\r
- int iRVBLeft;\r
- int iRVBRight;\r
\r
int FB_SRC_A; // (offset)\r
int FB_SRC_B; // (offset)\r
int iRightXAVol;\r
\r
SPUCHAN * s_chan;\r
- int * SB;\r
+ REVERBInfo * rvb;\r
\r
- int * RVB;\r
+ // buffers\r
+ int * SB;\r
int * SSumLR;\r
\r
int pad[29];\r
#ifndef _IN_SPU\r
\r
extern SPUInfo spu;\r
-extern REVERBInfo rvb;\r
\r
void do_samples(unsigned int cycles_to, int do_sync);\r
void schedule_next_irq(void);\r
//-------------------------------------------------//\r
case H_SPUReverbAddr:\r
if(val==0xFFFF || val<=0x200)\r
- {rvb.StartAddr=rvb.CurrAddr=0;}\r
+ {spu.rvb->StartAddr=spu.rvb->CurrAddr=0;}\r
else\r
{\r
const long iv=(unsigned long)val<<2;\r
- if(rvb.StartAddr!=iv)\r
+ if(spu.rvb->StartAddr!=iv)\r
{\r
- rvb.StartAddr=(unsigned long)val<<2;\r
- rvb.CurrAddr=rvb.StartAddr;\r
+ spu.rvb->StartAddr=(unsigned long)val<<2;\r
+ spu.rvb->CurrAddr=spu.rvb->StartAddr;\r
}\r
}\r
goto rvbd;\r
goto upd_irq;\r
//-------------------------------------------------//\r
case H_SPUrvolL:\r
- rvb.VolLeft=val;\r
+ spu.rvb->VolLeft=val;\r
break;\r
//-------------------------------------------------//\r
case H_SPUrvolR:\r
- rvb.VolRight=val;\r
+ spu.rvb->VolRight=val;\r
break;\r
//-------------------------------------------------//\r
\r
ReverbOn(16,24,val);\r
break;\r
//-------------------------------------------------//\r
- case H_Reverb+0 : rvb.FB_SRC_A=val*4; goto rvbd;\r
- case H_Reverb+2 : rvb.FB_SRC_B=val*4; goto rvbd;\r
- case H_Reverb+4 : rvb.IIR_ALPHA=(short)val; goto rvbd;\r
- case H_Reverb+6 : rvb.ACC_COEF_A=(short)val; goto rvbd;\r
- case H_Reverb+8 : rvb.ACC_COEF_B=(short)val; goto rvbd;\r
- case H_Reverb+10 : rvb.ACC_COEF_C=(short)val; goto rvbd;\r
- case H_Reverb+12 : rvb.ACC_COEF_D=(short)val; goto rvbd;\r
- case H_Reverb+14 : rvb.IIR_COEF=(short)val; goto rvbd;\r
- case H_Reverb+16 : rvb.FB_ALPHA=(short)val; goto rvbd;\r
- case H_Reverb+18 : rvb.FB_X=(short)val; goto rvbd;\r
- case H_Reverb+20 : rvb.IIR_DEST_A0=val*4; goto rvbd;\r
- case H_Reverb+22 : rvb.IIR_DEST_A1=val*4; goto rvbd;\r
- case H_Reverb+24 : rvb.ACC_SRC_A0=val*4; goto rvbd;\r
- case H_Reverb+26 : rvb.ACC_SRC_A1=val*4; goto rvbd;\r
- case H_Reverb+28 : rvb.ACC_SRC_B0=val*4; goto rvbd;\r
- case H_Reverb+30 : rvb.ACC_SRC_B1=val*4; goto rvbd;\r
- case H_Reverb+32 : rvb.IIR_SRC_A0=val*4; goto rvbd;\r
- case H_Reverb+34 : rvb.IIR_SRC_A1=val*4; goto rvbd;\r
- case H_Reverb+36 : rvb.IIR_DEST_B0=val*4; goto rvbd;\r
- case H_Reverb+38 : rvb.IIR_DEST_B1=val*4; goto rvbd;\r
- case H_Reverb+40 : rvb.ACC_SRC_C0=val*4; goto rvbd;\r
- case H_Reverb+42 : rvb.ACC_SRC_C1=val*4; goto rvbd;\r
- case H_Reverb+44 : rvb.ACC_SRC_D0=val*4; goto rvbd;\r
- case H_Reverb+46 : rvb.ACC_SRC_D1=val*4; goto rvbd;\r
- case H_Reverb+48 : rvb.IIR_SRC_B1=val*4; goto rvbd;\r
- case H_Reverb+50 : rvb.IIR_SRC_B0=val*4; goto rvbd;\r
- case H_Reverb+52 : rvb.MIX_DEST_A0=val*4; goto rvbd;\r
- case H_Reverb+54 : rvb.MIX_DEST_A1=val*4; goto rvbd;\r
- case H_Reverb+56 : rvb.MIX_DEST_B0=val*4; goto rvbd;\r
- case H_Reverb+58 : rvb.MIX_DEST_B1=val*4; goto rvbd;\r
- case H_Reverb+60 : rvb.IN_COEF_L=(short)val; goto rvbd;\r
- case H_Reverb+62 : rvb.IN_COEF_R=(short)val; goto rvbd;\r
+ case H_Reverb+0 : spu.rvb->FB_SRC_A=val*4; goto rvbd;\r
+ case H_Reverb+2 : spu.rvb->FB_SRC_B=val*4; goto rvbd;\r
+ case H_Reverb+4 : spu.rvb->IIR_ALPHA=(short)val; goto rvbd;\r
+ case H_Reverb+6 : spu.rvb->ACC_COEF_A=(short)val; goto rvbd;\r
+ case H_Reverb+8 : spu.rvb->ACC_COEF_B=(short)val; goto rvbd;\r
+ case H_Reverb+10 : spu.rvb->ACC_COEF_C=(short)val; goto rvbd;\r
+ case H_Reverb+12 : spu.rvb->ACC_COEF_D=(short)val; goto rvbd;\r
+ case H_Reverb+14 : spu.rvb->IIR_COEF=(short)val; goto rvbd;\r
+ case H_Reverb+16 : spu.rvb->FB_ALPHA=(short)val; goto rvbd;\r
+ case H_Reverb+18 : spu.rvb->FB_X=(short)val; goto rvbd;\r
+ case H_Reverb+20 : spu.rvb->IIR_DEST_A0=val*4; goto rvbd;\r
+ case H_Reverb+22 : spu.rvb->IIR_DEST_A1=val*4; goto rvbd;\r
+ case H_Reverb+24 : spu.rvb->ACC_SRC_A0=val*4; goto rvbd;\r
+ case H_Reverb+26 : spu.rvb->ACC_SRC_A1=val*4; goto rvbd;\r
+ case H_Reverb+28 : spu.rvb->ACC_SRC_B0=val*4; goto rvbd;\r
+ case H_Reverb+30 : spu.rvb->ACC_SRC_B1=val*4; goto rvbd;\r
+ case H_Reverb+32 : spu.rvb->IIR_SRC_A0=val*4; goto rvbd;\r
+ case H_Reverb+34 : spu.rvb->IIR_SRC_A1=val*4; goto rvbd;\r
+ case H_Reverb+36 : spu.rvb->IIR_DEST_B0=val*4; goto rvbd;\r
+ case H_Reverb+38 : spu.rvb->IIR_DEST_B1=val*4; goto rvbd;\r
+ case H_Reverb+40 : spu.rvb->ACC_SRC_C0=val*4; goto rvbd;\r
+ case H_Reverb+42 : spu.rvb->ACC_SRC_C1=val*4; goto rvbd;\r
+ case H_Reverb+44 : spu.rvb->ACC_SRC_D0=val*4; goto rvbd;\r
+ case H_Reverb+46 : spu.rvb->ACC_SRC_D1=val*4; goto rvbd;\r
+ case H_Reverb+48 : spu.rvb->IIR_SRC_B1=val*4; goto rvbd;\r
+ case H_Reverb+50 : spu.rvb->IIR_SRC_B0=val*4; goto rvbd;\r
+ case H_Reverb+52 : spu.rvb->MIX_DEST_A0=val*4; goto rvbd;\r
+ case H_Reverb+54 : spu.rvb->MIX_DEST_A1=val*4; goto rvbd;\r
+ case H_Reverb+56 : spu.rvb->MIX_DEST_B0=val*4; goto rvbd;\r
+ case H_Reverb+58 : spu.rvb->MIX_DEST_B1=val*4; goto rvbd;\r
+ case H_Reverb+60 : spu.rvb->IN_COEF_L=(short)val; goto rvbd;\r
+ case H_Reverb+62 : spu.rvb->IN_COEF_R=(short)val; goto rvbd;\r
}\r
return;\r
\r
return;\r
\r
rvbd:\r
- rvb.dirty = 1; // recalculate on next update\r
+ spu.rvb->dirty = 1; // recalculate on next update\r
}\r
\r
////////////////////////////////////////////////////////////////////////\r
\r
// get_buffer content helper: takes care about wraps\r
#define g_buffer(var) \\r
- ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var)])\r
+ ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)])\r
\r
// saturate iVal and store it as var\r
#define s_buffer(var, iVal) \\r
ssat32_to_16(iVal); \\r
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var)] = iVal\r
+ spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)] = iVal\r
\r
#define s_buffer1(var, iVal) \\r
ssat32_to_16(iVal); \\r
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var + 1)] = iVal\r
+ spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var + 1)] = iVal\r
\r
////////////////////////////////////////////////////////////////////////\r
\r
// portions based on spu2-x from PCSX2\r
-static void MixREVERB(int *SSumLR, int *RVB, int ns_to)\r
+static void MixREVERB(int *SSumLR, int *RVB, int ns_to, int curr_addr)\r
{\r
- int l_old = rvb.iRVBLeft;\r
- int r_old = rvb.iRVBRight;\r
- int curr_addr = rvb.CurrAddr;\r
- int space = 0x40000 - rvb.StartAddr;\r
- int l = 0, r = 0, ns;\r
+ const REVERBInfo *rvb = spu.rvb;\r
+ int IIR_ALPHA = rvb->IIR_ALPHA;\r
+ int IIR_COEF = rvb->IIR_COEF;\r
+ int space = 0x40000 - rvb->StartAddr;\r
+ int l, r, ns;\r
\r
for (ns = 0; ns < ns_to * 2; )\r
{\r
- int IIR_ALPHA = rvb.IIR_ALPHA;\r
int ACC0, ACC1, FB_A0, FB_A1, FB_B0, FB_B1;\r
int mix_dest_a0, mix_dest_a1, mix_dest_b0, mix_dest_b1;\r
\r
- int input_L = RVB[ns] * rvb.IN_COEF_L;\r
- int input_R = RVB[ns+1] * rvb.IN_COEF_R;\r
+ int input_L = RVB[ns] * rvb->IN_COEF_L;\r
+ int input_R = RVB[ns+1] * rvb->IN_COEF_R;\r
\r
- int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * rvb.IIR_COEF) + input_L) >> 15;\r
- int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * rvb.IIR_COEF) + input_R) >> 15;\r
- int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * rvb.IIR_COEF) + input_L) >> 15;\r
- int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * rvb.IIR_COEF) + input_R) >> 15;\r
+ int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * IIR_COEF) + input_L) >> 15;\r
+ int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * IIR_COEF) + input_R) >> 15;\r
+ int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * IIR_COEF) + input_L) >> 15;\r
+ int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * IIR_COEF) + input_R) >> 15;\r
\r
int iir_dest_a0 = g_buffer(IIR_DEST_A0);\r
int iir_dest_a1 = g_buffer(IIR_DEST_A1);\r
int IIR_B0 = iir_dest_b0 + ((IIR_INPUT_B0 - iir_dest_b0) * IIR_ALPHA >> 15);\r
int IIR_B1 = iir_dest_b1 + ((IIR_INPUT_B1 - iir_dest_b1) * IIR_ALPHA >> 15);\r
\r
+ preload(SSumLR + ns + 64*2/4 - 4);\r
+\r
s_buffer1(IIR_DEST_A0, IIR_A0);\r
s_buffer1(IIR_DEST_A1, IIR_A1);\r
s_buffer1(IIR_DEST_B0, IIR_B0);\r
s_buffer1(IIR_DEST_B1, IIR_B1);\r
\r
- ACC0 = (g_buffer(ACC_SRC_A0) * rvb.ACC_COEF_A +\r
- g_buffer(ACC_SRC_B0) * rvb.ACC_COEF_B +\r
- g_buffer(ACC_SRC_C0) * rvb.ACC_COEF_C +\r
- g_buffer(ACC_SRC_D0) * rvb.ACC_COEF_D) >> 15;\r
- ACC1 = (g_buffer(ACC_SRC_A1) * rvb.ACC_COEF_A +\r
- g_buffer(ACC_SRC_B1) * rvb.ACC_COEF_B +\r
- g_buffer(ACC_SRC_C1) * rvb.ACC_COEF_C +\r
- g_buffer(ACC_SRC_D1) * rvb.ACC_COEF_D) >> 15;\r
+ preload(RVB + ns + 64*2/4 - 4);\r
+\r
+ ACC0 = (g_buffer(ACC_SRC_A0) * rvb->ACC_COEF_A +\r
+ g_buffer(ACC_SRC_B0) * rvb->ACC_COEF_B +\r
+ g_buffer(ACC_SRC_C0) * rvb->ACC_COEF_C +\r
+ g_buffer(ACC_SRC_D0) * rvb->ACC_COEF_D) >> 15;\r
+ ACC1 = (g_buffer(ACC_SRC_A1) * rvb->ACC_COEF_A +\r
+ g_buffer(ACC_SRC_B1) * rvb->ACC_COEF_B +\r
+ g_buffer(ACC_SRC_C1) * rvb->ACC_COEF_C +\r
+ g_buffer(ACC_SRC_D1) * rvb->ACC_COEF_D) >> 15;\r
\r
FB_A0 = g_buffer(FB_SRC_A0);\r
FB_A1 = g_buffer(FB_SRC_A1);\r
FB_B0 = g_buffer(FB_SRC_B0);\r
FB_B1 = g_buffer(FB_SRC_B1);\r
\r
- mix_dest_a0 = ACC0 - ((FB_A0 * rvb.FB_ALPHA) >> 15);\r
- mix_dest_a1 = ACC1 - ((FB_A1 * rvb.FB_ALPHA) >> 15);\r
+ mix_dest_a0 = ACC0 - ((FB_A0 * rvb->FB_ALPHA) >> 15);\r
+ mix_dest_a1 = ACC1 - ((FB_A1 * rvb->FB_ALPHA) >> 15);\r
\r
- mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb.FB_ALPHA - FB_B0 * rvb.FB_X) >> 15);\r
- mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb.FB_ALPHA - FB_B1 * rvb.FB_X) >> 15);\r
+ mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb->FB_ALPHA - FB_B0 * rvb->FB_X) >> 15);\r
+ mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb->FB_ALPHA - FB_B1 * rvb->FB_X) >> 15);\r
\r
s_buffer(MIX_DEST_A0, mix_dest_a0);\r
s_buffer(MIX_DEST_A1, mix_dest_a1);\r
l = (mix_dest_a0 + mix_dest_b0) / 2;\r
r = (mix_dest_a1 + mix_dest_b1) / 2;\r
\r
- l = (l * rvb.VolLeft) >> 15; // 15?\r
- r = (r * rvb.VolRight) >> 15;\r
+ l = (l * rvb->VolLeft) >> 15; // 15?\r
+ r = (r * rvb->VolRight) >> 15;\r
\r
- SSumLR[ns++] += (l + l_old) / 2;\r
- SSumLR[ns++] += (r + r_old) / 2;\r
SSumLR[ns++] += l;\r
SSumLR[ns++] += r;\r
-\r
- l_old = l;\r
- r_old = r;\r
+ SSumLR[ns++] += l;\r
+ SSumLR[ns++] += r;\r
\r
curr_addr++;\r
- if (curr_addr >= 0x40000) curr_addr = rvb.StartAddr;\r
+ if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr;\r
}\r
-\r
- rvb.iRVBLeft = l;\r
- rvb.iRVBRight = r;\r
- rvb.CurrAddr = curr_addr;\r
}\r
\r
-static void MixREVERB_off(int *SSumLR, int ns_to)\r
+static void MixREVERB_off(int *SSumLR, int ns_to, int curr_addr)\r
{\r
- int l_old = rvb.iRVBLeft;\r
- int r_old = rvb.iRVBRight;\r
- int curr_addr = rvb.CurrAddr;\r
- int space = 0x40000 - rvb.StartAddr;\r
- int l = 0, r = 0, ns;\r
+ const REVERBInfo *rvb = spu.rvb;\r
+ int space = 0x40000 - rvb->StartAddr;\r
+ int l, r, ns;\r
\r
for (ns = 0; ns < ns_to * 2; )\r
{\r
+ preload(SSumLR + ns + 64*2/4 - 4);\r
+\r
l = (g_buffer(MIX_DEST_A0) + g_buffer(MIX_DEST_B0)) / 2;\r
r = (g_buffer(MIX_DEST_A1) + g_buffer(MIX_DEST_B1)) / 2;\r
\r
- l = (l * rvb.VolLeft) >> 15;\r
- r = (r * rvb.VolRight) >> 15;\r
+ l = (l * rvb->VolLeft) >> 15;\r
+ r = (r * rvb->VolRight) >> 15;\r
\r
- SSumLR[ns++] += (l + l_old) / 2;\r
- SSumLR[ns++] += (r + r_old) / 2;\r
SSumLR[ns++] += l;\r
SSumLR[ns++] += r;\r
-\r
- l_old = l;\r
- r_old = r;\r
+ SSumLR[ns++] += l;\r
+ SSumLR[ns++] += r;\r
\r
curr_addr++;\r
- if (curr_addr >= 0x40000) curr_addr = rvb.StartAddr;\r
+ if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr;\r
}\r
-\r
- rvb.iRVBLeft = l;\r
- rvb.iRVBRight = r;\r
- rvb.CurrAddr = curr_addr;\r
}\r
\r
-static void prepare_offsets(void)\r
+static void REVERBPrep(void)\r
{\r
- int space = 0x40000 - rvb.StartAddr;\r
+ REVERBInfo *rvb = spu.rvb;\r
+ int space = 0x40000 - rvb->StartAddr;\r
int t;\r
#define prep_offs(v) \\r
- t = rvb.v; \\r
+ t = rvb->v; \\r
while (t >= space) \\r
t -= space; \\r
- rvb.n##v = t\r
+ rvb->n##v = t\r
#define prep_offs2(d, v1, v2) \\r
- t = rvb.v1 - rvb.v2; \\r
+ t = rvb->v1 - rvb->v2; \\r
while (t >= space) \\r
t -= space; \\r
- rvb.n##d = t\r
+ rvb->n##d = t\r
\r
prep_offs(IIR_SRC_A0);\r
prep_offs(IIR_SRC_A1);\r
\r
#undef prep_offs\r
#undef prep_offs2\r
- rvb.dirty = 0;\r
+ rvb->dirty = 0;\r
}\r
\r
-INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to)\r
+INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to, int curr_addr)\r
{\r
- if (!rvb.StartAddr) // reverb is off\r
- {\r
- rvb.iRVBLeft = rvb.iRVBRight = 0;\r
- return;\r
- }\r
-\r
if (spu.spuCtrl & 0x80) // -> reverb on? oki\r
{\r
- if (unlikely(rvb.dirty))\r
- prepare_offsets();\r
-\r
- MixREVERB(SSumLR, RVB, ns_to);\r
- }\r
- else if (rvb.VolLeft || rvb.VolRight)\r
- {\r
- if (unlikely(rvb.dirty))\r
- prepare_offsets();\r
-\r
- MixREVERB_off(SSumLR, ns_to);\r
+ MixREVERB(SSumLR, RVB, ns_to, curr_addr);\r
}\r
- else // -> reverb off\r
+ else if (spu.rvb->VolLeft || spu.rvb->VolRight)\r
{\r
- // reverb runs anyway\r
- rvb.CurrAddr += ns_to / 2;\r
- while (rvb.CurrAddr >= 0x40000)\r
- rvb.CurrAddr -= 0x40000 - rvb.StartAddr;\r
+ MixREVERB_off(SSumLR, ns_to, curr_addr);\r
}\r
}\r
\r
SPUInfo spu;
SPUConfig spu_config;
-// MAIN infos struct for each channel
-
-REVERBInfo rvb;
-
static int iFMod[NSSIZE];
+static int RVB[NSSIZE * 2];
int ChanBuf[NSSIZE];
#define CDDA_BUFFER_SIZE (16384 * sizeof(uint32_t)) // must be power of 2
static void do_channels(int ns_to)
{
unsigned int mask;
+ int do_rvb, ch, d;
SPUCHAN *s_chan;
int *SB, sinc;
- int ch, d;
- memset(spu.RVB, 0, ns_to * sizeof(spu.RVB[0]) * 2);
+ do_rvb = spu.rvb->StartAddr && spu_config.iUseReverb;
+ if (do_rvb)
+ memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2);
mask = spu.dwNewChannel & 0xffffff;
for (ch = 0; mask != 0; ch++, mask >>= 1) {
if (s_chan->bFMod == 2) // fmod freq channel
memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0]));
- if (s_chan->bRVBActive)
- mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, spu.RVB);
+ if (s_chan->bRVBActive && do_rvb)
+ mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, RVB);
else
mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume);
}
+
+ if (spu.rvb->StartAddr) {
+ if (do_rvb) {
+ if (unlikely(spu.rvb->dirty))
+ REVERBPrep();
+
+ REVERBDo(spu.SSumLR, RVB, ns_to, spu.rvb->CurrAddr);
+ }
+
+ spu.rvb->CurrAddr += ns_to / 2;
+ while (spu.rvb->CurrAddr >= 0x40000)
+ spu.rvb->CurrAddr -= 0x40000 - spu.rvb->StartAddr;
+ }
}
-static void do_samples_finish(int *SSumLR, int *RVB, int ns_to,
+static void do_samples_finish(int *SSumLR, int ns_to,
int silentch, int decode_pos);
// optional worker thread handling
unsigned int exit_thread;
unsigned int i_ready;
unsigned int i_reaped;
- unsigned int req_sent; // dsp
- unsigned int last_boot_cnt;
+ unsigned int last_boot_cnt; // dsp
};
// aligning for C64X_DSP
unsigned int _pad0[128/4];
int ns_to;
int ctrl;
int decode_pos;
+ int rvb_addr;
unsigned int channels_new;
unsigned int channels_on;
unsigned int channels_silent;
int start;
int loop;
int ns_to;
+ short vol_l;
+ short vol_r;
ADSRInfoEx adsr;
- // might want to add vol and fmod flags..
+ // might also want to add fmod flags..
} ch[24];
- int RVB[NSSIZE * 2];
int SSumLR[NSSIZE * 2];
} i[4];
} *worker;
static void thread_work_start(void);
static void thread_work_wait_sync(struct work_item *work, int force);
+static void thread_sync_caches(void);
static int thread_get_i_done(void);
static int decode_block_work(void *context, int ch, int *SB)
work->ch[ch].sbpos = s_chan->iSBPos;
work->ch[ch].sinc = s_chan->sinc;
work->ch[ch].adsr = s_chan->ADSRX;
+ work->ch[ch].vol_l = s_chan->iLeftVolume;
+ work->ch[ch].vol_r = s_chan->iRightVolume;
work->ch[ch].start = s_chan->pCurr - spu.spuMemC;
work->ch[ch].loop = s_chan->pLoop - spu.spuMemC;
if (s_chan->prevflags & 1)
}
}
+ work->rvb_addr = 0;
+ if (spu.rvb->StartAddr) {
+ if (spu_config.iUseReverb) {
+ if (unlikely(spu.rvb->dirty))
+ REVERBPrep();
+ work->rvb_addr = spu.rvb->CurrAddr;
+ }
+
+ spu.rvb->CurrAddr += ns_to / 2;
+ while (spu.rvb->CurrAddr >= 0x40000)
+ spu.rvb->CurrAddr -= 0x40000 - spu.rvb->StartAddr;
+ }
+
worker->i_ready++;
thread_work_start();
}
SPUCHAN *s_chan;
ns_to = work->ns_to;
- memset(work->RVB, 0, ns_to * sizeof(work->RVB[0]) * 2);
+
+ if (work->rvb_addr)
+ memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2);
mask = work->channels_new;
for (ch = 0; mask != 0; ch++, mask >>= 1) {
if (s_chan->bFMod == 2) // fmod freq channel
memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0]));
- if (s_chan->bRVBActive)
+ if (s_chan->bRVBActive && work->rvb_addr)
mix_chan_rvb(work->SSumLR, ns_to,
- s_chan->iLeftVolume, s_chan->iRightVolume, work->RVB);
+ work->ch[ch].vol_l, work->ch[ch].vol_r, RVB);
else
- mix_chan(work->SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume);
+ mix_chan(work->SSumLR, ns_to, work->ch[ch].vol_l, work->ch[ch].vol_r);
}
+
+ if (work->rvb_addr)
+ REVERBDo(work->SSumLR, RVB, ns_to, work->rvb_addr);
}
static void sync_worker_thread(int force)
work = &worker->i[worker->i_reaped & WORK_I_MASK];
thread_work_wait_sync(work, force);
- do_samples_finish(work->SSumLR, work->RVB, work->ns_to,
+ do_samples_finish(work->SSumLR, work->ns_to,
work->channels_silent, work->decode_pos);
worker->i_reaped++;
done = thread_get_i_done() - worker->i_reaped;
used_space = worker->i_ready - worker->i_reaped;
}
+ if (force)
+ thread_sync_caches();
}
#else
if (do_direct || worker == NULL || !spu_config.iUseThread) {
do_channels(ns_to);
- do_samples_finish(spu.SSumLR, spu.RVB, ns_to, silentch, spu.decode_pos);
+ do_samples_finish(spu.SSumLR, ns_to, silentch, spu.decode_pos);
}
else {
queue_channel_work(ns_to, silentch);
spu.decode_pos = (spu.decode_pos + ns_to) & 0x1ff;
}
-static void do_samples_finish(int *SSumLR, int *RVB, int ns_to,
+static void do_samples_finish(int *SSumLR, int ns_to,
int silentch, int decode_pos)
{
int volmult = spu_config.iVolume;
spu.decode_dirty_ch &= ~(1<<3);
}
- //---------------------------------------------------//
- // mix XA infos (if any)
-
MixXA(SSumLR, ns_to, decode_pos);
- ///////////////////////////////////////////////////////
- // mix all channels (including reverb) into one buffer
-
- if(spu_config.iUseReverb)
- REVERBDo(SSumLR, RVB, ns_to);
-
if((spu.spuCtrl&0x4000)==0) // muted? (rare, don't optimize for this)
{
memset(spu.pS, 0, ns_to * 2 * sizeof(spu.pS[0]));
}
// SETUPSTREAMS: init most of the spu buffers
-void SetupStreams(void)
+static void SetupStreams(void)
{
- int i;
-
spu.pSpuBuffer = (unsigned char *)malloc(32768); // alloc mixing buffer
- spu.RVB = calloc(NSSIZE * 2, sizeof(spu.RVB[0]));
spu.SSumLR = calloc(NSSIZE * 2, sizeof(spu.SSumLR[0]));
spu.XAStart = // alloc xa buffer
spu.CDDAPlay = spu.CDDAStart;
spu.CDDAFeed = spu.CDDAStart;
- for(i=0;i<MAXCHAN;i++) // loop sound channels
- {
- spu.s_chan[i].ADSRX.SustainLevel = 0xf; // -> init sustain
- spu.s_chan[i].ADSRX.SustainIncrease = 1;
- spu.s_chan[i].pLoop=spu.spuMemC;
- spu.s_chan[i].pCurr=spu.spuMemC;
- }
-
ClearWorkingState();
-
- spu.bSpuInit=1; // flag: we are inited
}
// REMOVESTREAMS: free most buffer
-void RemoveStreams(void)
+static void RemoveStreams(void)
{
free(spu.pSpuBuffer); // free mixing buffer
spu.pSpuBuffer = NULL;
- free(spu.RVB); // free reverb buffer
- spu.RVB = NULL;
free(spu.SSumLR);
spu.SSumLR = NULL;
free(spu.XAStart); // free XA buffer
return worker->i_done;
}
+static void thread_sync_caches(void)
+{
+}
+
static void *spu_worker_thread(void *unused)
{
struct work_item *work;
// SPUINIT: this func will be called first by the main emu
long CALLBACK SPUinit(void)
{
+ int i;
+
spu.spuMemC = calloc(1, 512 * 1024);
- memset((void *)&rvb, 0, sizeof(REVERBInfo));
InitADSR();
spu.s_chan = calloc(MAXCHAN+1, sizeof(spu.s_chan[0])); // channel + 1 infos (1 is security for fmod handling)
+ spu.rvb = calloc(1, sizeof(REVERBInfo));
spu.SB = calloc(MAXCHAN, sizeof(spu.SB[0]) * SB_SIZE);
spu.spuAddr = 0;
init_spu_thread();
+ for (i = 0; i < MAXCHAN; i++) // loop sound channels
+ {
+ spu.s_chan[i].ADSRX.SustainLevel = 0xf; // -> init sustain
+ spu.s_chan[i].ADSRX.SustainIncrease = 1;
+ spu.s_chan[i].pLoop = spu.spuMemC;
+ spu.s_chan[i].pCurr = spu.spuMemC;
+ }
+
+ spu.bSpuInit=1; // flag: we are inited
+
return 0;
}
spu.SB = NULL;
free(spu.s_chan);
spu.s_chan = NULL;
+ free(spu.rvb);
+ spu.rvb = NULL;
RemoveStreams(); // no more streaming
spu.bSpuInit=0;
dsp_mem_region_t region;
dsp_component_id_t compid;
+ unsigned int stale_caches:1;
+ unsigned int req_sent:1;
} f;
static void thread_work_start(void)
// to start the DSP, dsp_rpc_send() must be used,
// but before that, previous request must be finished
- if (worker->req_sent) {
+ if (f.req_sent) {
if (worker->boot_cnt == worker->last_boot_cnt) {
// hopefully still booting
//printf("booting?\n");
if (ret != 0) {
fprintf(stderr, "dsp_rpc_recv failed: %d\n", ret);
f.dsp_logbuf_print();
- worker->req_sent = 0;
+ f.req_sent = 0;
spu_config.iUseThread = 0;
return;
}
spu_config.iUseThread = 0;
return;
}
- worker->req_sent = 1;
+ f.req_sent = 1;
}
static int thread_get_i_done(void)
int limit = 1000;
int ns_to;
- ns_to = work->ns_to;
- f.dsp_cache_inv_virt(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to);
- f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to);
- __builtin_prefetch(work->RVB);
- __builtin_prefetch(work->SSumLR);
-
while (worker->i_done == worker->i_reaped && limit-- > 0) {
- if (!worker->active) {
+ if (!f.req_sent) {
+ printf("dsp: req not sent?\n");
+ break;
+ }
+
+ if (worker->boot_cnt != worker->last_boot_cnt && !worker->active) {
printf("dsp: broken sync\n");
worker->last_boot_cnt = ~0;
break;
f.dsp_cache_inv_virt(&worker->i_done, 64);
}
+ ns_to = work->ns_to;
+ f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to);
+ preload(work->SSumLR);
+ preload(work->SSumLR + 64/4);
+
+ f.stale_caches = 1; // SB, spuMem
+
if (limit == 0)
printf("dsp: wait timeout\n");
if (worker->i_reaped != worker->i_done - 1)
return;
- if (worker->req_sent && (force || worker->i_done == worker->i_ready)) {
+ if (f.req_sent && (force || worker->i_done == worker->i_ready)) {
dsp_msg_t msg;
int ret;
f.dsp_logbuf_print();
spu_config.iUseThread = 0;
}
- worker->req_sent = 0;
+ f.req_sent = 0;
}
+}
- if (force) {
+static void thread_sync_caches(void)
+{
+ if (f.stale_caches) {
f.dsp_cache_inv_virt(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24);
f.dsp_cache_inv_virt(spu.spuMemC + 0x800, 0x800);
+ if (spu.rvb->StartAddr) {
+ int left = 0x40000 - spu.rvb->StartAddr;
+ f.dsp_cache_inv_virt(spu.spuMem + spu.rvb->StartAddr, left * 2);
+ }
+ f.stale_caches = 0;
}
}
mem->sizeof_region_mem, sizeof(*mem));
goto fail_init;
}
- if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), s_chan[1])) {
+ if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), in.s_chan[1])) {
fprintf(stderr, "error: size mismatch 2: %d vs %zd\n",
- mem->offsetof_s_chan1, offsetof(typeof(*mem), s_chan[1]));
+ mem->offsetof_s_chan1, offsetof(typeof(*mem), in.s_chan[1]));
goto fail_init;
}
if (mem->offsetof_spos_3_20 != offsetof(typeof(*mem), worker.i[3].ch[20])) {
free(spu.SB);
spu.SB = mem->SB;
free(spu.s_chan);
- spu.s_chan = mem->s_chan;
+ spu.s_chan = mem->in.s_chan;
+ free(spu.rvb);
+ spu.rvb = &mem->in.rvb;
worker = &mem->worker;
printf("spu: C64x DSP ready (id=%d).\n", (int)f.compid);
if (worker == NULL)
return;
- if (worker->req_sent)
+ if (f.req_sent) {
f.dsp_rpc_recv(&msg);
+ f.req_sent = 0;
+ }
f.dsp_logbuf_print();
f.dsp_shm_free(f.region);
spu.spuMemC = NULL;
spu.SB = NULL;
spu.s_chan = NULL;
+ spu.rvb = NULL;
worker = NULL;
}
// careful not to lose ARM writes by DSP overwriting
// with old data when it's writing out neighbor cachelines
int _pad1[128/4 - ((SB_SIZE * 24) & (128/4 - 1))];
- SPUCHAN s_chan[24 + 1];
- int _pad2[128/4 - ((sizeof(SPUCHAN) * 25 / 4) & (128/4 - 1))];
+ struct spu_in {
+ // these are not to be modified by DSP
+ SPUCHAN s_chan[24 + 1];
+ REVERBInfo rvb;
+ } in;
+ int _pad2[128/4 - ((sizeof(struct spu_in) / 4) & (128/4 - 1))];
struct spu_worker worker;
SPUConfig spu_config;
// init/debug
/* dummy deps, some bloat but avoids ifdef hell in SPU code.. */
static void thread_work_start(void) {}
static void thread_work_wait_sync(struct work_item *work, int force) {}
+static void thread_sync_caches(void) {}
static int thread_get_i_done(void) { return 0; }
struct out_driver *out_current;
void SetupSound(void) {}
static void invalidate_cache(struct work_item *work)
{
- syscalls.cache_inv(work, offsetof(typeof(*work), RVB), 1);
+ // see comment in writeout_cache()
+ //syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1);
syscalls.cache_inv(spu.s_chan, sizeof(spu.s_chan[0]) * 24, 0);
syscalls.cache_inv(work->SSumLR,
sizeof(work->SSumLR[0]) * 2 * work->ns_to, 0);
{
int ns_to = work->ns_to;
- syscalls.cache_wb(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to, 1);
syscalls.cache_wb(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to, 1);
+ // have to invalidate now, otherwise there is a race between
+ // DSP evicting dirty lines and ARM writing new data to this area
+ syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 0);
}
static void do_processing(void)
{
+ int left, dirty = 0, had_rvb = 0;
struct work_item *work;
- int left, dirty = 0;
while (worker->active)
{
work = &worker->i[worker->i_done & WORK_I_MASK];
invalidate_cache(work);
+ had_rvb |= work->rvb_addr;
+ spu.spuCtrl = work->ctrl;
do_channel_work(work);
writeout_cache(work);
if (dirty) {
syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1);
syscalls.cache_wb(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24, 1);
+ if (had_rvb) {
+ left = 0x40000 - spu.rvb->StartAddr;
+ syscalls.cache_wb(spu.spuMem + spu.rvb->StartAddr, left * 2, 1);
+ had_rvb = 0;
+ }
dirty = 0;
continue;
}
spu.spuMemC = mem->spu_ram;
spu.SB = mem->SB;
- spu.s_chan = mem->s_chan;
+ spu.s_chan = mem->in.s_chan;
+ spu.rvb = &mem->in.rvb;
worker = &mem->worker;
memcpy(&spu_config, &mem->spu_config, sizeof(spu_config));
mem->sizeof_region_mem = sizeof(*mem);
- mem->offsetof_s_chan1 = offsetof(typeof(*mem), s_chan[1]);
+ mem->offsetof_s_chan1 = offsetof(typeof(*mem), in.s_chan[1]);
mem->offsetof_spos_3_20 = offsetof(typeof(*mem), worker.i[3].ch[20]);
// seems to be unneeded, no write-alloc? but just in case..
syscalls.cache_wb(&mem->sizeof_region_mem, 3 * 4, 1);
// c64_tools lib does BCACHE_wbInvAll() when it receives mailbox irq,
// but invalidate anyway in case c64_tools is ever fixed..
- syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0);
+ // XXX edit: don't bother as reverb is not handled, will fix if needed
+ //syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0);
+ //syscalls.cache_inv(&mem->in, sizeof(mem->in), 0);
break;
default: