--- /dev/null
+ifndef C64_TOOLS_DSP_ROOT
+$(error need C64_TOOLS_DSP_ROOT)
+endif
+
+include $(C64_TOOLS_DSP_ROOT)/install.mk
+
+TARGET_BASENAME = pcsxr_spu
+OPTFLAGS += -O2
+CFLAGS += -DNO_OS -DWANT_THREAD_CODE
+
+OBJ = \
+ spu_c64x_dspcode.o64
+
+include $(C64_TOOLS_DSP_ROOT)/build_area3.mk
+include $(C64_TOOLS_DSP_ROOT)/build.mk
int iRightVolume; // right volume\r
ADSRInfoEx ADSRX;\r
int iRawPitch; // raw pitch (0...3fff)\r
-\r
- int SB[32+4];\r
} SPUCHAN;\r
\r
///////////////////////////////////////////////////////////\r
\r
// psx buffers / addresses\r
\r
+#define SB_SIZE (32 + 4)\r
+\r
typedef struct\r
{\r
unsigned short spuCtrl;\r
int iRightXAVol;\r
\r
SPUCHAN * s_chan;\r
+ int * SB;\r
\r
- int pad[31];\r
+ int pad[30];\r
unsigned short regArea[0x400];\r
} SPUInfo;\r
\r
d->iSBPos = s->iSBPos;\r
d->spos = s->spos;\r
d->sinc = s->sinc;\r
- memcpy(d->SB, s->SB, sizeof(d->SB));\r
+ memcpy(d->SB, spu.SB + ch * SB_SIZE, sizeof(d->SB[0]) * SB_SIZE);\r
d->iStart = (regAreaGet(ch,6)&~1)<<3;\r
d->iCurr = 0; // set by the caller\r
d->iLoop = 0; // set by the caller\r
d->bIgnoreLoop = (s->prevflags ^ 2) << 1;\r
d->iRightVolume = s->iRightVolume;\r
d->iRawPitch = s->iRawPitch;\r
- d->s_1 = s->SB[27]; // yes it's reversed\r
- d->s_2 = s->SB[26];\r
+ d->s_1 = spu.SB[ch * SB_SIZE + 27]; // yes it's reversed\r
+ d->s_2 = spu.SB[ch * SB_SIZE + 26];\r
d->bRVBActive = s->bRVBActive;\r
d->bNoise = s->bNoise;\r
d->bFMod = s->bFMod;\r
d->spos = s->spos;\r
d->sinc = s->sinc;\r
d->sinc_inv = 0;\r
- memcpy(d->SB, s->SB, sizeof(d->SB));\r
+ memcpy(spu.SB + ch * SB_SIZE, s->SB, sizeof(spu.SB[0]) * SB_SIZE);\r
d->pCurr = (void *)((long)s->iCurr & 0x7fff0);\r
d->pLoop = (void *)((long)s->iLoop & 0x7fff0);\r
d->bReverb = s->bReverb;\r
load_register(H_CDRight, cycles);\r
\r
// fix to prevent new interpolations from crashing\r
- for(i=0;i<MAXCHAN;i++) spu.s_chan[i].SB[28]=0;\r
+ for(i=0;i<MAXCHAN;i++) spu.SB[i * SB_SIZE + 28]=0;\r
\r
ClearWorkingState();\r
spu.cycles_played = cycles;\r
spu.s_chan[ch].iRawPitch=NP;\r
spu.s_chan[ch].sinc=(NP<<4)|8;\r
spu.s_chan[ch].sinc_inv=0;\r
- if(spu_config.iUseInterpolation==1) spu.s_chan[ch].SB[32]=1; // -> freq change in simple interpolation mode: set flag\r
+ if (spu_config.iUseInterpolation == 1)\r
+ spu.SB[ch * SB_SIZE + 32] = 1; // -> freq change in simple interpolation mode: set flag\r
}\r
\r
////////////////////////////////////////////////////////////////////////\r
* *
***************************************************************************/
-#ifndef _WIN32
+#if !defined(_WIN32) && !defined(NO_OS)
#include <sys/time.h> // gettimeofday in xa.c
#define THREAD_ENABLED 1
#endif
#include "externals.h"
#include "registers.h"
#include "out.h"
-#include "arm_features.h"
#include "spu_config.h"
+#ifdef __arm__
+#include "arm_features.h"
+#endif
+
#ifdef __ARM_ARCH_7A__
#define ssat32_to_16(v) \
asm("ssat %0,#16,%1" : "=r" (v) : "r" (v))
REVERBInfo rvb;
-#ifdef THREAD_ENABLED
+#if defined(THREAD_ENABLED) || defined(WANT_THREAD_CODE)
// worker thread state
static struct spu_worker {
unsigned int pending:1;
unsigned int exit_thread:1;
+ unsigned int stale_cache:1;
int ns_to;
int ctrl;
int decode_pos;
int silentch;
unsigned int chmask;
- unsigned int r_chan_end;
- unsigned int r_decode_dirty;
struct {
int spos;
int sbpos;
ADSRInfoEx adsr;
// might want to add vol and fmod flags..
} ch[24];
+ struct {
+ struct {
+ int adsrState;
+ int adsrEnvelopeVol;
+ } ch[24];
+ unsigned int chan_end;
+ unsigned int decode_dirty;
+ } r;
} *worker;
#else
INLINE void StartSound(int ch)
{
SPUCHAN *s_chan = &spu.s_chan[ch];
+ int *SB = spu.SB + ch * SB_SIZE;
StartADSR(ch);
StartREVERB(ch);
s_chan->prevflags=2;
- s_chan->SB[26]=0; // init mixing vars
- s_chan->SB[27]=0;
s_chan->iSBPos=27;
+ SB[26]=0; // init mixing vars
+ SB[27]=0;
- s_chan->SB[28]=0;
- s_chan->SB[29]=0; // init our interpolation helpers
- s_chan->SB[30]=0;
- s_chan->SB[31]=0;
+ SB[28]=0;
+ SB[29]=0; // init our interpolation helpers
+ SB[30]=0;
+ SB[31]=0;
s_chan->spos=0;
spu.dwNewChannel&=~(1<<ch); // clear new channel bit
return ret;
}
-#ifdef THREAD_ENABLED
+#if defined(THREAD_ENABLED) || defined(WANT_THREAD_CODE)
static int decode_block_work(int ch, int *SB)
{
if (!(mask & 1)) continue; // channel not playing? next
s_chan = &spu.s_chan[ch];
- SB = s_chan->SB;
+ SB = spu.SB + ch * SB_SIZE;
sinc = s_chan->sinc;
if (s_chan->bNoise)
// optional worker thread handling
-#ifdef THREAD_ENABLED
+#if defined(THREAD_ENABLED) || defined(WANT_THREAD_CODE)
static void thread_work_start(void);
static void thread_work_wait_sync(void);
+static void thread_sync_caches(void);
static void queue_channel_work(int ns_to, int silentch)
{
sinc = worker->ch[ch].sinc;
s_chan = &spu.s_chan[ch];
- SB = s_chan->SB;
+ SB = spu.SB + ch * SB_SIZE;
if (s_chan->bNoise)
do_lsfr_samples(d, worker->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal);
worker->ch[ch].adsr.EnvelopeVol = 0;
memset(&ChanBuf[d], 0, (ns_to - d) * sizeof(ChanBuf[0]));
}
+ worker->r.ch[ch].adsrState = worker->ch[ch].adsr.State;
+ worker->r.ch[ch].adsrEnvelopeVol = worker->ch[ch].adsr.EnvelopeVol;
if (ch == 1 || ch == 3)
{
mix_chan(0, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume);
}
- worker->r_chan_end = endmask;
- worker->r_decode_dirty = decode_dirty_ch;
+ worker->r.chan_end = endmask;
+ worker->r.decode_dirty = decode_dirty_ch;
}
-static void sync_worker_thread(void)
+static void sync_worker_thread(int do_direct)
{
unsigned int mask;
int ch;
+ if (do_direct)
+ thread_sync_caches();
if (!worker->pending)
return;
// be sure there was no keyoff while thread was working
if (spu.s_chan[ch].ADSRX.State != ADSR_RELEASE)
- spu.s_chan[ch].ADSRX.State = worker->ch[ch].adsr.State;
- spu.s_chan[ch].ADSRX.EnvelopeVol = worker->ch[ch].adsr.EnvelopeVol;
+ spu.s_chan[ch].ADSRX.State = worker->r.ch[ch].adsrState;
+ spu.s_chan[ch].ADSRX.EnvelopeVol = worker->r.ch[ch].adsrEnvelopeVol;
}
- spu.dwChannelOn &= ~worker->r_chan_end;
- spu.decode_dirty_ch |= worker->r_decode_dirty;
+ spu.dwChannelOn &= ~worker->r.chan_end;
+ spu.decode_dirty_ch |= worker->r.decode_dirty;
do_samples_finish(worker->ns_to, worker->silentch,
worker->decode_pos);
#else
static void queue_channel_work(int ns_to, int silentch) {}
-static void sync_worker_thread(void) {}
+static void sync_worker_thread(int do_direct) {}
#endif // THREAD_ENABLED
// here is the main job handler...
////////////////////////////////////////////////////////////////////////
-void do_samples(unsigned int cycles_to, int do_sync)
+void do_samples(unsigned int cycles_to, int do_direct)
{
unsigned int mask;
int ch, ns_to;
return;
}
+ do_direct |= (cycle_diff < 64 * 768);
+ if (worker != NULL)
+ sync_worker_thread(do_direct);
+
if (cycle_diff < 2 * 768)
return;
}
}
- if (worker != NULL)
- sync_worker_thread();
-
mask = spu.dwNewChannel & 0xffffff;
for (ch = 0; mask != 0; ch++, mask >>= 1) {
if (mask & 1)
do_samples_finish(ns_to, silentch, spu.decode_pos);
}
else {
- if (do_sync || worker == NULL || !spu_config.iUseThread) {
+ if (do_direct || worker == NULL || !spu_config.iUseThread) {
do_channels(ns_to);
do_samples_finish(ns_to, silentch, spu.decode_pos);
}
sem_wait(&t.sem_done);
}
+static void thread_sync_caches(void)
+{
+}
+
static void *spu_worker_thread(void *unused)
{
while (1) {
InitADSR();
spu.s_chan = calloc(MAXCHAN+1, sizeof(spu.s_chan[0])); // channel + 1 infos (1 is security for fmod handling)
+ spu.SB = calloc(MAXCHAN, sizeof(spu.SB[0]) * SB_SIZE);
spu.spuAddr = 0;
spu.decode_pos = 0;
free(spu.spuMemC);
spu.spuMemC = NULL;
+ free(spu.SB);
+ spu.SB = NULL;
free(spu.s_chan);
spu.s_chan = NULL;
*/
#include <dlfcn.h>
+#include <stddef.h>
+
#include <inc_libc64_mini.h>
#include "spu_c64x.h"
static dsp_mem_region_t region;
+static dsp_component_id_t compid;
static struct {
void *handle;
int (*dsp_cache_inv_virt)(void *_virtAddr, sU32 _size);
int (*dsp_rpc_send)(const dsp_msg_t *_msgTo);
int (*dsp_rpc_recv)(dsp_msg_t *_msgFrom);
+ int (*dsp_rpc)(const dsp_msg_t *_msgTo, dsp_msg_t *_msgFrom);
void (*dsp_logbuf_print)(void);
} f;
static void thread_work_start(void)
{
- do_channel_work();
+ dsp_msg_t msg;
+ int ret;
+
+ DSP_MSG_INIT(&msg, compid, CCMD_DOIT, 0, 0);
+ ret = f.dsp_rpc_send(&msg);
+ if (ret != 0) {
+ fprintf(stderr, "dsp_rpc_send failed: %d\n", ret);
+ f.dsp_logbuf_print();
+ // maybe stop using the DSP?
+ }
}
static void thread_work_wait_sync(void)
{
+ dsp_msg_t msg;
+ int ns_to;
+ int ret;
+
+ ns_to = worker->ns_to;
+ f.dsp_cache_inv_virt(spu.sRVBStart, sizeof(spu.sRVBStart[0]) * 2 * ns_to);
+ f.dsp_cache_inv_virt(SSumLR, sizeof(SSumLR[0]) * 2 * ns_to);
+ f.dsp_cache_inv_virt(&worker->r, sizeof(worker->r));
+ worker->stale_cache = 1; // SB, ram
+
+ ret = f.dsp_rpc_recv(&msg);
+ if (ret != 0) {
+ fprintf(stderr, "dsp_rpc_recv failed: %d\n", ret);
+ f.dsp_logbuf_print();
+ }
+ //f.dsp_logbuf_print();
+}
+
+// called before ARM decides to do SPU mixing itself
+static void thread_sync_caches(void)
+{
+ if (worker->stale_cache) {
+ f.dsp_cache_inv_virt(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24);
+ f.dsp_cache_inv_virt(spu.spuMemC + 0x800, 0x800);
+ worker->stale_cache = 0;
+ }
}
static void init_spu_thread(void)
{
+ dsp_msg_t init_msg, msg_in;
struct region_mem *mem;
int ret;
LDS(dsp_component_load);
LDS(dsp_rpc_send);
LDS(dsp_rpc_recv);
+ LDS(dsp_rpc);
LDS(dsp_logbuf_print);
#undef LDS
if (failed) {
return;
}
+ ret = f.dsp_component_load(NULL, COMPONENT_NAME, &compid);
+ if (ret != 0) {
+ fprintf(stderr, "dsp_component_load failed: %d\n", ret);
+ goto fail_cload;
+ }
+
region = f.dsp_shm_alloc(DSP_CACHE_R, sizeof(*mem)); // writethrough
if (region.size < sizeof(*mem) || region.virt_addr == 0) {
fprintf(stderr, "dsp_shm_alloc failed\n");
}
mem = (void *)region.virt_addr;
+ memcpy(&mem->spu_config, &spu_config, sizeof(mem->spu_config));
+
+ DSP_MSG_INIT(&init_msg, compid, CCMD_INIT, region.phys_addr, 0);
+ ret = f.dsp_rpc(&init_msg, &msg_in);
+ if (ret != 0) {
+ fprintf(stderr, "dsp_rpc failed: %d\n", ret);
+ goto fail_init;
+ }
+
+ if (mem->sizeof_region_mem != sizeof(*mem)) {
+ fprintf(stderr, "error: size mismatch 1: %d vs %zd\n",
+ mem->sizeof_region_mem, sizeof(*mem));
+ goto fail_init;
+ }
+ if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), s_chan[1])) {
+ fprintf(stderr, "error: size mismatch 2: %d vs %zd\n",
+ mem->offsetof_s_chan1, offsetof(typeof(*mem), s_chan[1]));
+ goto fail_init;
+ }
+ if (mem->offsetof_worker_ram != offsetof(typeof(*mem), worker.ch[1])) {
+ fprintf(stderr, "error: size mismatch 3: %d vs %zd\n",
+ mem->offsetof_worker_ram, offsetof(typeof(*mem), worker.ch[1]));
+ goto fail_init;
+ }
+
// override default allocations
free(spu.spuMemC);
spu.spuMemC = mem->spu_ram;
spu.sRVBStart = mem->RVB;
free(SSumLR);
SSumLR = mem->SSumLR;
+ free(spu.SB);
+ spu.SB = mem->SB;
free(spu.s_chan);
spu.s_chan = mem->s_chan;
worker = &mem->worker;
- printf("C64x DSP ready.\n");
+ printf("spu: C64x DSP ready (id=%d).\n", (int)compid);
+ f.dsp_logbuf_print();
+
+pcnt_init();
+ (void)do_channel_work; // used by DSP instead
return;
+fail_init:
+ f.dsp_shm_free(region);
fail_mem:
+ // no component unload func?
+fail_cload:
+ printf("spu: C64x DSP init failed.\n");
+ f.dsp_logbuf_print();
f.dsp_close();
worker = NULL;
}
spu.spuMemC = NULL;
spu.sRVBStart = NULL;
SSumLR = NULL;
+ spu.SB = NULL;
spu.s_chan = NULL;
worker = NULL;
}
+#define COMPONENT_NAME "pcsxr_spu"
+
+enum {
+ CCMD_INIT = 0x101,
+ CCMD_DOIT = 0x102,
+};
struct region_mem {
unsigned char spu_ram[512 * 1024];
int RVB[NSSIZE * 2];
int SSumLR[NSSIZE * 2];
+ int SB[SB_SIZE * 24];
+ // careful not to lose ARM writes by DSP overwriting
+ // with old data when it's writing out neighbor cachelines
+ int _pad1[128/4 - ((NSSIZE * 4 + SB_SIZE * 24) & (128/4 - 1))];
SPUCHAN s_chan[24 + 1];
+ int _pad2[128/4 - ((sizeof(SPUCHAN) * 25 / 4) & (128/4 - 1))];
struct spu_worker worker;
+ SPUConfig spu_config;
+ // init/debug
+ int sizeof_region_mem;
+ int offsetof_s_chan1;
+ int offsetof_worker_ram;
};
--- /dev/null
+/*
+ * SPU processing offload to TI C64x DSP using bsp's c64_tools
+ * (C) GraÅžvydas "notaz" Ignotas, 2015
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define SYSCALLS_C
+#include <libc64_dsp/include/inc_overlay.h>
+#include <stddef.h>
+
+#include "spu.c"
+#include "spu_c64x.h"
+
+/* dummy deps, some bloat but avoids ifdef hell in SPU code.. */
+static void thread_work_start(void) {}
+static void thread_work_wait_sync(void) {}
+static void thread_sync_caches(void) {}
+struct out_driver *out_current;
+void SetupSound(void) {}
+
+#if 0
+// no use, c64_tools does BCACHE_wbInvAll..
+static void sync_caches(void)
+{
+ int ns_to = worker->ns_to;
+
+ syscalls.cache_wb(spu.sRVBStart, sizeof(spu.sRVBStart[0]) * 2 * ns_to, 1);
+ syscalls.cache_wb(SSumLR, sizeof(SSumLR[0]) * 2 * ns_to, 1);
+
+ syscalls.cache_wbInv(worker, sizeof(*worker), 1);
+}
+#endif
+
+static unsigned int exec(dsp_component_cmd_t cmd,
+ unsigned int arg1, unsigned int arg2,
+ unsigned int *ret1, unsigned int *ret2)
+{
+ struct region_mem *mem = (void *)arg1;
+ int i;
+
+ switch (cmd) {
+ case CCMD_INIT:
+ InitADSR();
+
+ spu.spuMemC = mem->spu_ram;
+ spu.sRVBStart = mem->RVB;
+ SSumLR = mem->SSumLR;
+ spu.SB = mem->SB;
+ spu.s_chan = mem->s_chan;
+ worker = &mem->worker;
+ memcpy(&spu_config, &mem->spu_config, sizeof(spu_config));
+
+ mem->sizeof_region_mem = sizeof(*mem);
+ mem->offsetof_s_chan1 = offsetof(typeof(*mem), s_chan[1]);
+ mem->offsetof_worker_ram = offsetof(typeof(*mem), worker.ch[1]);
+ // seems to be unneeded, no write-alloc? but just in case..
+ syscalls.cache_wb(&mem->sizeof_region_mem, 3 * 4, 1);
+ break;
+
+ case CCMD_DOIT:
+ do_channel_work();
+ // c64_tools lib does BCACHE_wbInvAll() when it receives mailbox irq,
+ // so there is no benefit of syncing only what's needed.
+ // But call wbInvAll() anyway in case c64_tools is ever fixed..
+ //sync_caches();
+ syscalls.cache_wbInvAll();
+ break;
+
+ default:
+ syscalls.printf("bad cmd: %x\n", cmd);
+ break;
+ }
+
+ return 0;
+}
+
+#pragma DATA_SECTION(component_test_dsp, ".sec_com");
+dsp_component_t component_test_dsp = {
+ {
+ NULL, /* init */
+ exec,
+ NULL, /* exec fastcall RPC */
+ NULL, /* exit */
+ },
+
+ COMPONENT_NAME,
+};
+
+DSP_COMPONENT_MAIN
+
+// vim:shiftwidth=1:expandtab
static unsigned long timeGetTime_spu()
{
-#ifdef _WIN32
+#if defined(NO_OS)
+ return 0;
+#elif defined(_WIN32)
return GetTickCount();
#else
struct timeval tv;