X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fdfsound%2Fspu.c;h=ec31b0ca3db94f3f0c00061dc6676e03386042d1;hp=b091efcc96fd0b9d8e6937520d9de861b966a84f;hb=8f5f2dd5a70f47322614eda6f97304808447199c;hpb=63a4f6b6a3b0315590cd3009df2c92480ed2d98b diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index b091efcc..ec31b0ca 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -18,7 +18,7 @@ * * ***************************************************************************/ -#ifndef _WIN32 +#if !defined(_WIN32) && !defined(NO_OS) #include // gettimeofday in xa.c #define THREAD_ENABLED 1 #endif @@ -29,9 +29,12 @@ #include "externals.h" #include "registers.h" #include "out.h" -#include "arm_features.h" #include "spu_config.h" +#ifdef __arm__ +#include "arm_features.h" +#endif + #ifdef __ARM_ARCH_7A__ #define ssat32_to_16(v) \ asm("ssat %0,#16,%1" : "=r" (v) : "r" (v)) @@ -72,57 +75,10 @@ SPUConfig spu_config; // MAIN infos struct for each channel -SPUCHAN s_chan[MAXCHAN+1]; // channel + 1 infos (1 is security for fmod handling) REVERBInfo rvb; -#ifdef THREAD_ENABLED - -#include -#include -#include - -// worker thread state -static struct { - unsigned int pending:1; - unsigned int exit_thread:1; - int ns_to; - int ctrl; - int decode_pos; - int silentch; - int *sRVBStart; - unsigned char *ram; - unsigned int chmask; - unsigned int r_chan_end; - unsigned int r_decode_dirty; - pthread_t thread; - sem_t sem_avail; - sem_t sem_done; - struct { - int spos; - int sbpos; - int sinc; - int start; - int loop; - int ns_to; - ADSRInfoEx adsr; - // might want to add vol and fmod flags.. - } ch[24]; -} *worker; - -#else -static const void * const worker = NULL; -#endif - -// certain globals (were local before, but with the new timeproc I need em global) - -static const int f[8][2] = { { 0, 0 }, - { 60, 0 }, - { 115, -52 }, - { 98, -55 }, - { 122, -60 } }; +static int iFMod[NSSIZE]; int ChanBuf[NSSIZE]; -int SSumLR[NSSIZE*2]; -int iFMod[NSSIZE]; #define CDDA_BUFFER_SIZE (16384 * sizeof(uint32_t)) // must be power of 2 @@ -272,28 +228,39 @@ static int check_irq(int ch, unsigned char *pos) // START SOUND... called by main thread to setup a new sound on a channel //////////////////////////////////////////////////////////////////////// -INLINE void StartSound(int ch) +static void StartSoundSB(int *SB) { - StartADSR(ch); - StartREVERB(ch); + SB[26]=0; // init mixing vars + SB[27]=0; + + SB[28]=0; + SB[29]=0; // init our interpolation helpers + SB[30]=0; + SB[31]=0; +} - s_chan[ch].prevflags=2; +static void StartSoundMain(int ch) +{ + SPUCHAN *s_chan = &spu.s_chan[ch]; - s_chan[ch].SB[26]=0; // init mixing vars - s_chan[ch].SB[27]=0; - s_chan[ch].iSBPos=27; + StartADSR(ch); + StartREVERB(ch); - s_chan[ch].SB[28]=0; - s_chan[ch].SB[29]=0; // init our interpolation helpers - s_chan[ch].SB[30]=0; - s_chan[ch].SB[31]=0; - s_chan[ch].spos=0; + s_chan->prevflags=2; + s_chan->iSBPos=27; + s_chan->spos=0; spu.dwNewChannel&=~(1<pCurr; // set up the current pos if (start == spu.spuMemC) // ? ret = 1; - if (s_chan[ch].prevflags & 1) // 1: stop/loop + if (s_chan->prevflags & 1) // 1: stop/loop { - if (!(s_chan[ch].prevflags & 2)) + if (!(s_chan->prevflags & 2)) ret = 1; - start = s_chan[ch].pLoop; + start = s_chan->pLoop; } else check_irq(ch, start); // hack, see check_irq below.. - predict_nr = (int)start[0]; + predict_nr = start[0]; shift_factor = predict_nr & 0xf; predict_nr >>= 4; @@ -462,20 +437,20 @@ static int decode_block(int ch, int *SB) flags = start[1]; if (flags & 4) - s_chan[ch].pLoop = start; // loop adress + s_chan->pLoop = start; // loop adress start += 16; if (flags & 1) { // 1: stop/loop - start = s_chan[ch].pLoop; + start = s_chan->pLoop; check_irq(ch, start); // hack.. :( } if (start - spu.spuMemC >= 0x80000) start = spu.spuMemC; - s_chan[ch].pCurr = start; // store values for next cycle - s_chan[ch].prevflags = flags; + s_chan->pCurr = start; // store values for next cycle + s_chan->prevflags = flags; return ret; } @@ -483,81 +458,51 @@ static int decode_block(int ch, int *SB) // do block, but ignore sample data static int skip_block(int ch) { - unsigned char *start = s_chan[ch].pCurr; + SPUCHAN *s_chan = &spu.s_chan[ch]; + unsigned char *start = s_chan->pCurr; int flags; int ret = 0; - if (s_chan[ch].prevflags & 1) { - if (!(s_chan[ch].prevflags & 2)) + if (s_chan->prevflags & 1) { + if (!(s_chan->prevflags & 2)) ret = 1; - start = s_chan[ch].pLoop; + start = s_chan->pLoop; } else check_irq(ch, start); flags = start[1]; if (flags & 4) - s_chan[ch].pLoop = start; + s_chan->pLoop = start; start += 16; if (flags & 1) { - start = s_chan[ch].pLoop; + start = s_chan->pLoop; check_irq(ch, start); } - s_chan[ch].pCurr = start; - s_chan[ch].prevflags = flags; + s_chan->pCurr = start; + s_chan->prevflags = flags; return ret; } -#ifdef THREAD_ENABLED - -static int decode_block_work(int ch, int *SB) -{ - int predict_nr, shift_factor, flags; - const unsigned char *ram = worker->ram; - int start = worker->ch[ch].start; - int loop = worker->ch[ch].loop; - - predict_nr = ram[start]; - shift_factor = predict_nr & 0xf; - predict_nr >>= 4; - - decode_block_data(SB, ram + start + 2, predict_nr, shift_factor); - - flags = ram[start + 1]; - if (flags & 4) - loop = start; // loop adress - - start += 16; - - if (flags & 1) // 1: stop/loop - start = loop; - - worker->ch[ch].start = start & 0x7ffff; - worker->ch[ch].loop = loop; - - return 0; -} - -#endif - // if irq is going to trigger sooner than in upd_samples, set upd_samples static void scan_for_irq(int ch, unsigned int *upd_samples) { + SPUCHAN *s_chan = &spu.s_chan[ch]; int pos, sinc, sinc_inv, end; unsigned char *block; int flags; - block = s_chan[ch].pCurr; - pos = s_chan[ch].spos; - sinc = s_chan[ch].sinc; + block = s_chan->pCurr; + pos = s_chan->spos; + sinc = s_chan->sinc; end = pos + *upd_samples * sinc; - pos += (28 - s_chan[ch].iSBPos) << 16; + pos += (28 - s_chan->iSBPos) << 16; while (pos < end) { if (block == spu.pSpuIrq) @@ -565,7 +510,7 @@ static void scan_for_irq(int ch, unsigned int *upd_samples) flags = block[1]; block += 16; if (flags & 1) { // 1: stop/loop - block = s_chan[ch].pLoop; + block = s_chan->pLoop; if (block == spu.pSpuIrq) // hack.. (see decode_block) break; } @@ -574,11 +519,11 @@ static void scan_for_irq(int ch, unsigned int *upd_samples) if (pos < end) { - sinc_inv = s_chan[ch].sinc_inv; + sinc_inv = s_chan->sinc_inv; if (sinc_inv == 0) - sinc_inv = s_chan[ch].sinc_inv = (0x80000000u / (uint32_t)sinc) << 1; + sinc_inv = s_chan->sinc_inv = (0x80000000u / (uint32_t)sinc) << 1; - pos -= s_chan[ch].spos; + pos -= s_chan->spos; *upd_samples = (((uint64_t)pos * sinc_inv) >> 32) + 1; //xprintf("ch%02d: irq sched: %3d %03d\n", // ch, *upd_samples, *upd_samples * 60 * 263 / 44100); @@ -586,8 +531,9 @@ static void scan_for_irq(int ch, unsigned int *upd_samples) } #define make_do_samples(name, fmod_code, interp_start, interp1_code, interp2_code, interp_end) \ -static noinline int do_samples_##name(int (*decode_f)(int ch, int *SB), int ch, \ - int ns_to, int *SB, int sinc, int *spos, int *sbpos) \ +static noinline int do_samples_##name( \ + int (*decode_f)(void *context, int ch, int *SB), void *ctx, \ + int ch, int ns_to, int *SB, int sinc, int *spos, int *sbpos) \ { \ int ns, d, fa; \ int ret = ns_to; \ @@ -604,7 +550,7 @@ static noinline int do_samples_##name(int (*decode_f)(int ch, int *SB), int ch, if (*sbpos >= 28) \ { \ *sbpos = 0; \ - d = decode_f(ch, SB); \ + d = decode_f(ctx, ch, SB); \ if (d && ns < ret) \ ret = ns; \ } \ @@ -622,12 +568,12 @@ static noinline int do_samples_##name(int (*decode_f)(int ch, int *SB), int ch, } #define fmod_recv_check \ - if(s_chan[ch].bFMod==1 && iFMod[ns]) \ - sinc = FModChangeFrequency(SB, s_chan[ch].iRawPitch, ns) + if(spu.s_chan[ch].bFMod==1 && iFMod[ns]) \ + sinc = FModChangeFrequency(SB, spu.s_chan[ch].iRawPitch, ns) make_do_samples(default, fmod_recv_check, , - StoreInterpolationVal(SB, sinc, fa, s_chan[ch].bFMod==2), - ChanBuf[ns] = iGetInterpolationVal(SB, sinc, *spos, s_chan[ch].bFMod==2), ) + StoreInterpolationVal(SB, sinc, fa, spu.s_chan[ch].bFMod==2), + ChanBuf[ns] = iGetInterpolationVal(SB, sinc, *spos, spu.s_chan[ch].bFMod==2), ) make_do_samples(noint, , fa = SB[29], , ChanBuf[ns] = fa, SB[29] = fa) #define simple_interp_store \ @@ -648,24 +594,27 @@ make_do_samples(simple, , , static int do_samples_skip(int ch, int ns_to) { + SPUCHAN *s_chan = &spu.s_chan[ch]; + int spos = s_chan->spos; + int sinc = s_chan->sinc; int ret = ns_to, ns, d; - s_chan[ch].spos += s_chan[ch].iSBPos << 16; + spos += s_chan->iSBPos << 16; for (ns = 0; ns < ns_to; ns++) { - s_chan[ch].spos += s_chan[ch].sinc; - while (s_chan[ch].spos >= 28*0x10000) + spos += sinc; + while (spos >= 28*0x10000) { d = skip_block(ch); if (d && ns < ret) ret = ns; - s_chan[ch].spos -= 28*0x10000; + spos -= 28*0x10000; } } - s_chan[ch].iSBPos = s_chan[ch].spos >> 16; - s_chan[ch].spos &= 0xffff; + s_chan->iSBPos = spos >> 16; + s_chan->spos = spos & 0xffff; return ret; } @@ -714,13 +663,12 @@ static int do_samples_noise(int ch, int ns_to) #ifdef HAVE_ARMV5 // asm code; lv and rv must be 0-3fff -extern void mix_chan(int start, int count, int lv, int rv); -extern void mix_chan_rvb(int start, int count, int lv, int rv, int *rvb); +extern void mix_chan(int *SSumLR, int count, int lv, int rv); +extern void mix_chan_rvb(int *SSumLR, int count, int lv, int rv, int *rvb); #else -static void mix_chan(int start, int count, int lv, int rv) +static void mix_chan(int *SSumLR, int count, int lv, int rv) { - int *dst = SSumLR + start * 2; - const int *src = ChanBuf + start; + const int *src = ChanBuf; int l, r; while (count--) @@ -729,16 +677,16 @@ static void mix_chan(int start, int count, int lv, int rv) l = (sval * lv) >> 14; r = (sval * rv) >> 14; - *dst++ += l; - *dst++ += r; + *SSumLR++ += l; + *SSumLR++ += r; } } -static void mix_chan_rvb(int start, int count, int lv, int rv, int *rvb) +static void mix_chan_rvb(int *SSumLR, int count, int lv, int rv, int *rvb) { - int *dst = SSumLR + start * 2; - int *drvb = rvb + start * 2; - const int *src = ChanBuf + start; + const int *src = ChanBuf; + int *dst = SSumLR; + int *drvb = rvb; int l, r; while (count--) @@ -776,33 +724,38 @@ static noinline void do_decode_bufs(unsigned short *mem, int which, static void do_silent_chans(int ns_to, int silentch) { + unsigned int mask; + SPUCHAN *s_chan; int ch; - for (ch = 0; ch < MAXCHAN; ch++) + mask = silentch & 0xffffff; + for (ch = 0; mask != 0; ch++, mask >>= 1) { - if (!(silentch & (1< spu.pSpuIrq && s_chan[ch].pLoop > spu.pSpuIrq) + + s_chan = &spu.s_chan[ch]; + if (s_chan->pCurr > spu.pSpuIrq && s_chan->pLoop > spu.pSpuIrq) continue; - s_chan[ch].spos += s_chan[ch].iSBPos << 16; - s_chan[ch].iSBPos = 0; + s_chan->spos += s_chan->iSBPos << 16; + s_chan->iSBPos = 0; - s_chan[ch].spos += s_chan[ch].sinc * ns_to; - while (s_chan[ch].spos >= 28 * 0x10000) + s_chan->spos += s_chan->sinc * ns_to; + while (s_chan->spos >= 28 * 0x10000) { - unsigned char *start = s_chan[ch].pCurr; + unsigned char *start = s_chan->pCurr; skip_block(ch); - if (start == s_chan[ch].pCurr || start - spu.spuMemC < 0x1000) + if (start == s_chan->pCurr || start - spu.spuMemC < 0x1000) { // looping on self or stopped(?) spu.dwChannelDead |= 1<spos = 0; break; } - s_chan[ch].spos -= 28 * 0x10000; + s_chan->spos -= 28 * 0x10000; } } } @@ -810,36 +763,44 @@ static void do_silent_chans(int ns_to, int silentch) static void do_channels(int ns_to) { unsigned int mask; + SPUCHAN *s_chan; int *SB, sinc; int ch, d; - InitREVERB(ns_to); + memset(spu.RVB, 0, ns_to * sizeof(spu.RVB[0]) * 2); + + mask = spu.dwNewChannel & 0xffffff; + for (ch = 0; mask != 0; ch++, mask >>= 1) { + if (mask & 1) + StartSound(ch); + } mask = spu.dwChannelOn & 0xffffff; for (ch = 0; mask != 0; ch++, mask >>= 1) // loop em all... { if (!(mask & 1)) continue; // channel not playing? next - SB = s_chan[ch].SB; - sinc = s_chan[ch].sinc; + s_chan = &spu.s_chan[ch]; + SB = spu.SB + ch * SB_SIZE; + sinc = s_chan->sinc; - if (s_chan[ch].bNoise) + if (s_chan->bNoise) d = do_samples_noise(ch, ns_to); - else if (s_chan[ch].bFMod == 2 - || (s_chan[ch].bFMod == 0 && spu_config.iUseInterpolation == 0)) - d = do_samples_noint(decode_block, ch, ns_to, - SB, sinc, &s_chan[ch].spos, &s_chan[ch].iSBPos); - else if (s_chan[ch].bFMod == 0 && spu_config.iUseInterpolation == 1) - d = do_samples_simple(decode_block, ch, ns_to, - SB, sinc, &s_chan[ch].spos, &s_chan[ch].iSBPos); + else if (s_chan->bFMod == 2 + || (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 0)) + d = do_samples_noint(decode_block, NULL, ch, ns_to, + SB, sinc, &s_chan->spos, &s_chan->iSBPos); + else if (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 1) + d = do_samples_simple(decode_block, NULL, ch, ns_to, + SB, sinc, &s_chan->spos, &s_chan->iSBPos); else - d = do_samples_default(decode_block, ch, ns_to, - SB, sinc, &s_chan[ch].spos, &s_chan[ch].iSBPos); + d = do_samples_default(decode_block, NULL, ch, ns_to, + SB, sinc, &s_chan->spos, &s_chan->iSBPos); - d = MixADSR(&s_chan[ch].ADSRX, d); + d = MixADSR(&s_chan->ADSRX, d); if (d < ns_to) { spu.dwChannelOn &= ~(1 << ch); - s_chan[ch].ADSRX.EnvelopeVol = 0; + s_chan->ADSRX.EnvelopeVol = 0; memset(&ChanBuf[d], 0, (ns_to - d) * sizeof(ChanBuf[0])); } @@ -849,142 +810,243 @@ static void do_channels(int ns_to) spu.decode_dirty_ch |= 1 << ch; } - if (s_chan[ch].bFMod == 2) // fmod freq channel + if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); - if (s_chan[ch].bRVBActive) - mix_chan_rvb(0, ns_to, s_chan[ch].iLeftVolume, s_chan[ch].iRightVolume, spu.sRVBStart); + if (s_chan->bRVBActive) + mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, spu.RVB); else - mix_chan(0, ns_to, s_chan[ch].iLeftVolume, s_chan[ch].iRightVolume); + mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); } } -static void do_samples_finish(int ns_to, int silentch, int decode_pos); +static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, + int silentch, int decode_pos); // optional worker thread handling -#ifdef THREAD_ENABLED +#if defined(THREAD_ENABLED) || defined(WANT_THREAD_CODE) + +// worker thread state +static struct spu_worker { + union { + struct { + unsigned int exit_thread; + unsigned int i_ready; + unsigned int i_reaped; + unsigned int req_sent; // dsp + unsigned int last_boot_cnt; + }; + // aligning for C64X_DSP + unsigned int _pad0[128/4]; + }; + union { + struct { + unsigned int i_done; + unsigned int active; // dsp + unsigned int boot_cnt; + }; + unsigned int _pad1[128/4]; + }; + struct work_item { + int ns_to; + int ctrl; + int decode_pos; + unsigned int channels_new; + unsigned int channels_on; + unsigned int channels_silent; + struct { + int spos; + int sbpos; + int sinc; + int start; + int loop; + int ns_to; + ADSRInfoEx adsr; + // might want to add vol and fmod flags.. + } ch[24]; + int RVB[NSSIZE * 2]; + int SSumLR[NSSIZE * 2]; + } i[4]; +} *worker; + +#define WORK_MAXCNT (sizeof(worker->i) / sizeof(worker->i[0])) +#define WORK_I_MASK (WORK_MAXCNT - 1) + +static void thread_work_start(void); +static void thread_work_wait_sync(struct work_item *work, int force); +static int thread_get_i_done(void); + +static int decode_block_work(void *context, int ch, int *SB) +{ + const unsigned char *ram = spu.spuMemC; + int predict_nr, shift_factor, flags; + struct work_item *work = context; + int start = work->ch[ch].start; + int loop = work->ch[ch].loop; + + predict_nr = ram[start]; + shift_factor = predict_nr & 0xf; + predict_nr >>= 4; + + decode_block_data(SB, ram + start + 2, predict_nr, shift_factor); -static void queue_channel_work(int ns_to, int silentch) + flags = ram[start + 1]; + if (flags & 4) + loop = start; // loop adress + + start += 16; + + if (flags & 1) // 1: stop/loop + start = loop; + + work->ch[ch].start = start & 0x7ffff; + work->ch[ch].loop = loop; + + return 0; +} + +static void queue_channel_work(int ns_to, unsigned int silentch) { + struct work_item *work; + SPUCHAN *s_chan; unsigned int mask; - int ch; + int ch, d; - worker->ns_to = ns_to; - worker->ctrl = spu.spuCtrl; - worker->decode_pos = spu.decode_pos; - worker->silentch = silentch; - worker->sRVBStart = spu.sRVBStart; - worker->ram = spu.spuMemC; + work = &worker->i[worker->i_ready & WORK_I_MASK]; + work->ns_to = ns_to; + work->ctrl = spu.spuCtrl; + work->decode_pos = spu.decode_pos; + work->channels_silent = silentch; + + mask = work->channels_new = spu.dwNewChannel & 0xffffff; + for (ch = 0; mask != 0; ch++, mask >>= 1) { + if (mask & 1) + StartSoundMain(ch); + } + + mask = work->channels_on = spu.dwChannelOn & 0xffffff; + spu.decode_dirty_ch |= mask & 0x0a; - mask = worker->chmask = spu.dwChannelOn & 0xffffff; for (ch = 0; mask != 0; ch++, mask >>= 1) { if (!(mask & 1)) continue; - worker->ch[ch].spos = s_chan[ch].spos; - worker->ch[ch].sbpos = s_chan[ch].iSBPos; - worker->ch[ch].sinc = s_chan[ch].sinc; - worker->ch[ch].adsr = s_chan[ch].ADSRX; - worker->ch[ch].start = s_chan[ch].pCurr - spu.spuMemC; - worker->ch[ch].loop = s_chan[ch].pLoop - spu.spuMemC; - if (s_chan[ch].prevflags & 1) - worker->ch[ch].start = worker->ch[ch].loop; - - worker->ch[ch].ns_to = do_samples_skip(ch, ns_to); + s_chan = &spu.s_chan[ch]; + work->ch[ch].spos = s_chan->spos; + work->ch[ch].sbpos = s_chan->iSBPos; + work->ch[ch].sinc = s_chan->sinc; + work->ch[ch].adsr = s_chan->ADSRX; + work->ch[ch].start = s_chan->pCurr - spu.spuMemC; + work->ch[ch].loop = s_chan->pLoop - spu.spuMemC; + if (s_chan->prevflags & 1) + work->ch[ch].start = work->ch[ch].loop; + + d = do_samples_skip(ch, ns_to); + work->ch[ch].ns_to = d; + + // note: d is not accurate on skip + d = SkipADSR(&s_chan->ADSRX, d); + if (d < ns_to) { + spu.dwChannelOn &= ~(1 << ch); + s_chan->ADSRX.EnvelopeVol = 0; + } } - worker->pending = 1; - sem_post(&worker->sem_avail); + worker->i_ready++; + thread_work_start(); } -static void do_channel_work(void) +static void do_channel_work(struct work_item *work) { - unsigned int mask, endmask = 0; + unsigned int mask; unsigned int decode_dirty_ch = 0; int *SB, sinc, spos, sbpos; int d, ch, ns_to; + SPUCHAN *s_chan; + + ns_to = work->ns_to; + memset(work->RVB, 0, ns_to * sizeof(work->RVB[0]) * 2); - ns_to = worker->ns_to; - memset(worker->sRVBStart, 0, ns_to * sizeof(worker->sRVBStart[0]) * 2); + mask = work->channels_new; + for (ch = 0; mask != 0; ch++, mask >>= 1) { + if (mask & 1) + StartSoundSB(spu.SB + ch * SB_SIZE); + } - mask = worker->chmask; + mask = work->channels_on; for (ch = 0; mask != 0; ch++, mask >>= 1) { if (!(mask & 1)) continue; - d = worker->ch[ch].ns_to; - spos = worker->ch[ch].spos; - sbpos = worker->ch[ch].sbpos; - sinc = worker->ch[ch].sinc; - SB = s_chan[ch].SB; - - if (s_chan[ch].bNoise) - do_lsfr_samples(d, worker->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal); - else if (s_chan[ch].bFMod == 2 - || (s_chan[ch].bFMod == 0 && spu_config.iUseInterpolation == 0)) - do_samples_noint(decode_block_work, ch, d, SB, sinc, &spos, &sbpos); - else if (s_chan[ch].bFMod == 0 && spu_config.iUseInterpolation == 1) - do_samples_simple(decode_block_work, ch, d, SB, sinc, &spos, &sbpos); + d = work->ch[ch].ns_to; + spos = work->ch[ch].spos; + sbpos = work->ch[ch].sbpos; + sinc = work->ch[ch].sinc; + + s_chan = &spu.s_chan[ch]; + SB = spu.SB + ch * SB_SIZE; + + if (s_chan->bNoise) + do_lsfr_samples(d, work->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal); + else if (s_chan->bFMod == 2 + || (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 0)) + do_samples_noint(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); + else if (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 1) + do_samples_simple(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); else - do_samples_default(decode_block_work, ch, d, SB, sinc, &spos, &sbpos); + do_samples_default(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); - d = MixADSR(&worker->ch[ch].adsr, d); + d = MixADSR(&work->ch[ch].adsr, d); if (d < ns_to) { - endmask |= 1 << ch; - worker->ch[ch].adsr.EnvelopeVol = 0; + work->ch[ch].adsr.EnvelopeVol = 0; memset(&ChanBuf[d], 0, (ns_to - d) * sizeof(ChanBuf[0])); } if (ch == 1 || ch == 3) { - do_decode_bufs((void *)worker->ram, ch/2, ns_to, worker->decode_pos); + do_decode_bufs(spu.spuMem, ch/2, ns_to, work->decode_pos); decode_dirty_ch |= 1 << ch; } - if (s_chan[ch].bFMod == 2) // fmod freq channel + if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); - if (s_chan[ch].bRVBActive) - mix_chan_rvb(0, ns_to, s_chan[ch].iLeftVolume, s_chan[ch].iRightVolume, worker->sRVBStart); + if (s_chan->bRVBActive) + mix_chan_rvb(work->SSumLR, ns_to, + s_chan->iLeftVolume, s_chan->iRightVolume, work->RVB); else - mix_chan(0, ns_to, s_chan[ch].iLeftVolume, s_chan[ch].iRightVolume); + mix_chan(work->SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); } - - worker->r_chan_end = endmask; - worker->r_decode_dirty = decode_dirty_ch; } -static void sync_worker_thread(void) +static void sync_worker_thread(int force) { - unsigned int mask; - int ch; + struct work_item *work; + int done, used_space; - if (!worker->pending) - return; + done = thread_get_i_done() - worker->i_reaped; + used_space = worker->i_ready - worker->i_reaped; + //printf("done: %d use: %d dsp: %u/%u\n", done, used_space, + // worker->boot_cnt, worker->i_done); - sem_wait(&worker->sem_done); - worker->pending = 0; + while ((force && used_space > 0) || used_space >= WORK_MAXCNT || done > 0) { + work = &worker->i[worker->i_reaped & WORK_I_MASK]; + thread_work_wait_sync(work, force); - mask = worker->chmask; - for (ch = 0; mask != 0; ch++, mask >>= 1) { - if (!(mask & 1)) continue; + do_samples_finish(work->SSumLR, work->RVB, work->ns_to, + work->channels_silent, work->decode_pos); - // be sure there was no keyoff while thread was working - if (s_chan[ch].ADSRX.State != ADSR_RELEASE) - s_chan[ch].ADSRX.State = worker->ch[ch].adsr.State; - s_chan[ch].ADSRX.EnvelopeVol = worker->ch[ch].adsr.EnvelopeVol; + worker->i_reaped++; + done = thread_get_i_done() - worker->i_reaped; + used_space = worker->i_ready - worker->i_reaped; } - - spu.dwChannelOn &= ~worker->r_chan_end; - spu.decode_dirty_ch |= worker->r_decode_dirty; - - do_samples_finish(worker->ns_to, worker->silentch, - worker->decode_pos); } #else static void queue_channel_work(int ns_to, int silentch) {} -static void sync_worker_thread(void) {} +static void sync_worker_thread(int force) {} + +static const void * const worker = NULL; #endif // THREAD_ENABLED @@ -993,12 +1055,11 @@ static void sync_worker_thread(void) {} // here is the main job handler... //////////////////////////////////////////////////////////////////////// -void do_samples(unsigned int cycles_to, int do_sync) +void do_samples(unsigned int cycles_to, int do_direct) { - unsigned int mask; - int ch, ns_to; - int silentch; + unsigned int silentch; int cycle_diff; + int ns_to; cycle_diff = cycles_to - spu.cycles_played; if (cycle_diff < -2*1048576 || cycle_diff > 2*1048576) @@ -1008,6 +1069,12 @@ void do_samples(unsigned int cycles_to, int do_sync) return; } + silentch = ~(spu.dwChannelOn | spu.dwNewChannel) & 0xffffff; + + do_direct |= (silentch == 0xffffff); + if (worker != NULL) + sync_worker_thread(do_direct); + if (cycle_diff < 2 * 768) return; @@ -1047,29 +1114,12 @@ void do_samples(unsigned int cycles_to, int do_sync) } } - if (worker != NULL) - sync_worker_thread(); - - mask = spu.dwNewChannel & 0xffffff; - for (ch = 0; mask != 0; ch++, mask >>= 1) { - if (mask & 1) - StartSound(ch); - } - - silentch = ~spu.dwChannelOn & 0xffffff; - - if (spu.dwChannelOn == 0) { - InitREVERB(ns_to); - do_samples_finish(ns_to, silentch, spu.decode_pos); + if (do_direct || worker == NULL || !spu_config.iUseThread) { + do_channels(ns_to); + do_samples_finish(spu.SSumLR, spu.RVB, ns_to, silentch, spu.decode_pos); } else { - if (do_sync || worker == NULL || !spu_config.iUseThread) { - do_channels(ns_to); - do_samples_finish(ns_to, silentch, spu.decode_pos); - } - else { - queue_channel_work(ns_to, silentch); - } + queue_channel_work(ns_to, silentch); } // advance "stopped" channels that can cause irqs @@ -1081,13 +1131,15 @@ void do_samples(unsigned int cycles_to, int do_sync) spu.decode_pos = (spu.decode_pos + ns_to) & 0x1ff; } -static void do_samples_finish(int ns_to, int silentch, int decode_pos) +static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, + int silentch, int decode_pos) { int volmult = spu_config.iVolume; int ns; int d; - if(unlikely(silentch & spu.decode_dirty_ch & (1<<1))) // must clear silent channel decode buffers + // must clear silent channel decode buffers + if(unlikely(silentch & spu.decode_dirty_ch & (1<<1))) { memset(&spu.spuMem[0x800/2], 0, 0x400); spu.decode_dirty_ch &= ~(1<<1); @@ -1101,13 +1153,13 @@ static void do_samples_finish(int ns_to, int silentch, int decode_pos) //---------------------------------------------------// // mix XA infos (if any) - MixXA(ns_to, decode_pos); + MixXA(SSumLR, ns_to, decode_pos); /////////////////////////////////////////////////////// // mix all channels (including reverb) into one buffer if(spu_config.iUseReverb) - REVERBDo(ns_to); + REVERBDo(SSumLR, RVB, ns_to); if((spu.spuCtrl&0x4000)==0) // muted? (rare, don't optimize for this) { @@ -1145,8 +1197,8 @@ void schedule_next_irq(void) { if (spu.dwChannelDead & (1 << ch)) continue; - if ((unsigned long)(spu.pSpuIrq - s_chan[ch].pCurr) > IRQ_NEAR_BLOCKS * 16 - && (unsigned long)(spu.pSpuIrq - s_chan[ch].pLoop) > IRQ_NEAR_BLOCKS * 16) + if ((unsigned long)(spu.pSpuIrq - spu.s_chan[ch].pCurr) > IRQ_NEAR_BLOCKS * 16 + && (unsigned long)(spu.pSpuIrq - spu.s_chan[ch].pLoop) > IRQ_NEAR_BLOCKS * 16) continue; scan_for_irq(ch, &upd_samples); @@ -1226,8 +1278,7 @@ int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes) // to be called after state load void ClearWorkingState(void) { - memset(SSumLR,0,sizeof(SSumLR)); // init some mixing buffers - memset(iFMod,0,sizeof(iFMod)); + memset(iFMod, 0, sizeof(iFMod)); spu.pS=(short *)spu.pSpuBuffer; // setup soundbuffer pointer } @@ -1237,8 +1288,8 @@ void SetupStreams(void) int i; spu.pSpuBuffer = (unsigned char *)malloc(32768); // alloc mixing buffer - spu.sRVBStart = (int *)malloc(NSSIZE*2*4); // alloc reverb buffer - memset(spu.sRVBStart,0,NSSIZE*2*4); + spu.RVB = calloc(NSSIZE * 2, sizeof(spu.RVB[0])); + spu.SSumLR = calloc(NSSIZE * 2, sizeof(spu.SSumLR[0])); spu.XAStart = // alloc xa buffer (uint32_t *)malloc(44100 * sizeof(uint32_t)); @@ -1254,10 +1305,10 @@ void SetupStreams(void) for(i=0;i init sustain - s_chan[i].ADSRX.SustainIncrease = 1; - s_chan[i].pLoop=spu.spuMemC; - s_chan[i].pCurr=spu.spuMemC; + spu.s_chan[i].ADSRX.SustainLevel = 0xf; // -> init sustain + spu.s_chan[i].ADSRX.SustainIncrease = 1; + spu.s_chan[i].pLoop=spu.spuMemC; + spu.s_chan[i].pCurr=spu.spuMemC; } ClearWorkingState(); @@ -1270,26 +1321,64 @@ void RemoveStreams(void) { free(spu.pSpuBuffer); // free mixing buffer spu.pSpuBuffer = NULL; - free(spu.sRVBStart); // free reverb buffer - spu.sRVBStart = NULL; + free(spu.RVB); // free reverb buffer + spu.RVB = NULL; + free(spu.SSumLR); + spu.SSumLR = NULL; free(spu.XAStart); // free XA buffer spu.XAStart = NULL; free(spu.CDDAStart); // free CDDA buffer spu.CDDAStart = NULL; } -#ifdef THREAD_ENABLED +#if defined(C64X_DSP) + +/* special code for TI C64x DSP */ +#include "spu_c64x.c" + +#elif defined(THREAD_ENABLED) + +#include +#include +#include + +static struct { + pthread_t thread; + sem_t sem_avail; + sem_t sem_done; +} t; + +/* generic pthread implementation */ + +static void thread_work_start(void) +{ + sem_post(&t.sem_avail); +} + +static void thread_work_wait_sync(struct work_item *work, int force) +{ + sem_wait(&t.sem_done); +} + +static int thread_get_i_done(void) +{ + return worker->i_done; +} static void *spu_worker_thread(void *unused) { + struct work_item *work; + while (1) { - sem_wait(&worker->sem_avail); + sem_wait(&t.sem_avail); if (worker->exit_thread) break; - do_channel_work(); + work = &worker->i[worker->i_done & WORK_I_MASK]; + do_channel_work(work); + worker->i_done++; - sem_post(&worker->sem_done); + sem_post(&t.sem_done); } return NULL; @@ -1305,26 +1394,28 @@ static void init_spu_thread(void) worker = calloc(1, sizeof(*worker)); if (worker == NULL) return; - ret = sem_init(&worker->sem_avail, 0, 0); + ret = sem_init(&t.sem_avail, 0, 0); if (ret != 0) goto fail_sem_avail; - ret = sem_init(&worker->sem_done, 0, 0); + ret = sem_init(&t.sem_done, 0, 0); if (ret != 0) goto fail_sem_done; - ret = pthread_create(&worker->thread, NULL, spu_worker_thread, NULL); + ret = pthread_create(&t.thread, NULL, spu_worker_thread, NULL); if (ret != 0) goto fail_thread; + spu_config.iThreadAvail = 1; return; fail_thread: - sem_destroy(&worker->sem_done); + sem_destroy(&t.sem_done); fail_sem_done: - sem_destroy(&worker->sem_avail); + sem_destroy(&t.sem_avail); fail_sem_avail: free(worker); worker = NULL; + spu_config.iThreadAvail = 0; } static void exit_spu_thread(void) @@ -1332,10 +1423,10 @@ static void exit_spu_thread(void) if (worker == NULL) return; worker->exit_thread = 1; - sem_post(&worker->sem_avail); - pthread_join(worker->thread, NULL); - sem_destroy(&worker->sem_done); - sem_destroy(&worker->sem_avail); + sem_post(&t.sem_avail); + pthread_join(t.thread, NULL); + sem_destroy(&t.sem_done); + sem_destroy(&t.sem_avail); free(worker); worker = NULL; } @@ -1355,13 +1446,15 @@ static void exit_spu_thread(void) // SPUINIT: this func will be called first by the main emu long CALLBACK SPUinit(void) { - spu.spuMemC = (unsigned char *)spu.spuMem; // just small setup + spu.spuMemC = calloc(1, 512 * 1024); memset((void *)&rvb, 0, sizeof(REVERBInfo)); InitADSR(); - spu.spuAddr = 0xffffffff; + spu.s_chan = calloc(MAXCHAN+1, sizeof(spu.s_chan[0])); // channel + 1 infos (1 is security for fmod handling) + spu.SB = calloc(MAXCHAN, sizeof(spu.SB[0]) * SB_SIZE); + + spu.spuAddr = 0; spu.decode_pos = 0; - memset((void *)s_chan, 0, sizeof(s_chan)); spu.pSpuIrq = spu.spuMemC; SetupStreams(); // prepare streaming @@ -1402,11 +1495,19 @@ long CALLBACK SPUclose(void) long CALLBACK SPUshutdown(void) { SPUclose(); - RemoveStreams(); // no more streaming - spu.bSpuInit=0; exit_spu_thread(); + free(spu.spuMemC); + spu.spuMemC = NULL; + free(spu.SB); + spu.SB = NULL; + free(spu.s_chan); + spu.s_chan = NULL; + + RemoveStreams(); // no more streaming + spu.bSpuInit=0; + return 0; } @@ -1483,15 +1584,18 @@ void spu_get_debug_info(int *chans_out, int *run_chans, int *fmod_chans_out, int { int ch = 0, fmod_chans = 0, noise_chans = 0, irq_chans = 0; + if (spu.s_chan == NULL) + return; + for(;ch