X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fdfsound%2Fspu_c64x_dspcode.c;h=570da5eda44921d4ac9fbe93708e29f655619190;hp=117a29669ec24a5e66c06894cd9b85e9e2488a5e;hb=8a55ebcc07d4f860633db8c77bb9e16bcfa03313;hpb=de4a0279efefdd2e4595c8fc27f1564f4bff9341 diff --git a/plugins/dfsound/spu_c64x_dspcode.c b/plugins/dfsound/spu_c64x_dspcode.c index 117a2966..570da5ed 100644 --- a/plugins/dfsound/spu_c64x_dspcode.c +++ b/plugins/dfsound/spu_c64x_dspcode.c @@ -30,57 +30,147 @@ /* dummy deps, some bloat but avoids ifdef hell in SPU code.. */ static void thread_work_start(void) {} -static void thread_work_wait_sync(void) {} +static void thread_work_wait_sync(struct work_item *work, int force) {} static void thread_sync_caches(void) {} +static int thread_get_i_done(void) { return 0; } struct out_driver *out_current; void SetupSound(void) {} -#if 0 -// no use, c64_tools does BCACHE_wbInvAll.. -static void sync_caches(void) + +static void enable_l2_cache(void) +{ + volatile uint32_t *L2CFG = (volatile uint32_t *)0x01840000; + uint32_t *MARi = (void *)0x01848000; + int i; + + // program Memory Attribute Registers + // (old c64_tools has the defaults messed up) + // 00000000-0fffffff - not configurable + // 10000000-7fffffff - system + for (i = 0x10; i < 0x80; i++) + MARi[i] = 0; + // 80000000-9fffffff - RAM + for ( ; i < 0xa0; i++) + MARi[i] = 1; + // 0xa00000-ffffffff - reserved, etc + for ( ; i < 0x100; i++) + MARi[i] = 0; + + // enable L2 (1 for 32k, 2 for 64k) + if (!(*L2CFG & 2)) { + *L2CFG = 2; + // wait the for the write + *L2CFG; + } +} + +static void invalidate_cache(struct work_item *work) { - int ns_to = worker->ns_to; + // see comment in writeout_cache() + //syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1); + syscalls.cache_inv(spu.s_chan, sizeof(spu.s_chan[0]) * 24, 1); + syscalls.cache_inv(work->SSumLR, + sizeof(work->SSumLR[0]) * 2 * work->ns_to, 1); +} - syscalls.cache_wb(spu.sRVBStart, sizeof(spu.sRVBStart[0]) * 2 * ns_to, 1); - syscalls.cache_wb(SSumLR, sizeof(SSumLR[0]) * 2 * ns_to, 1); +static void writeout_cache(struct work_item *work) +{ + int ns_to = work->ns_to; - syscalls.cache_wbInv(worker, sizeof(*worker), 1); + syscalls.cache_wb(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to, 1); + // have to invalidate now, otherwise there is a race between + // DSP evicting dirty lines and ARM writing new data to this area + syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1); +} + +static void do_processing(void) +{ + int left, dirty = 0, had_rvb = 0; + struct work_item *work; + + while (worker->active) + { + // i_ready is in first cacheline + syscalls.cache_inv(worker, 64, 1); + + left = worker->i_ready - worker->i_done; + if (left > 0) { + dirty = 1; + worker->active = ACTIVE_CNT; + syscalls.cache_wb(&worker->active, 4, 1); + + work = &worker->i[worker->i_done & WORK_I_MASK]; + invalidate_cache(work); + had_rvb |= work->rvb_addr; + spu.spuCtrl = work->ctrl; + do_channel_work(work); + writeout_cache(work); + + worker->i_done++; + syscalls.cache_wb(&worker->i_done, 4, 1); + continue; + } + + // nothing to do? Write out non-critical caches + if (dirty) { + syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1); + syscalls.cache_wb(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24, 1); + if (had_rvb) { + left = 0x40000 - spu.rvb->StartAddr; + syscalls.cache_wb(spu.spuMem + spu.rvb->StartAddr, left * 2, 1); + had_rvb = 0; + } + dirty = 0; + continue; + } + + // this ->active loop thing is to avoid a race where we miss + // new work and clear ->active just after ARM checks it + worker->active--; + syscalls.cache_wb(&worker->active, 4, 1); + } } -#endif static unsigned int exec(dsp_component_cmd_t cmd, unsigned int arg1, unsigned int arg2, unsigned int *ret1, unsigned int *ret2) { struct region_mem *mem = (void *)arg1; - int i; switch (cmd) { case CCMD_INIT: + enable_l2_cache(); InitADSR(); spu.spuMemC = mem->spu_ram; - spu.sRVBStart = mem->RVB; - SSumLR = mem->SSumLR; spu.SB = mem->SB; - spu.s_chan = mem->s_chan; + spu.s_chan = mem->in.s_chan; + spu.rvb = &mem->in.rvb; worker = &mem->worker; - memcpy(&spu_config, &mem->spu_config, sizeof(spu_config)); + memcpy(&spu_config, &mem->in.spu_config, sizeof(spu_config)); mem->sizeof_region_mem = sizeof(*mem); - mem->offsetof_s_chan1 = offsetof(typeof(*mem), s_chan[1]); - mem->offsetof_worker_ram = offsetof(typeof(*mem), worker.ch[1]); + mem->offsetof_s_chan1 = offsetof(typeof(*mem), in.s_chan[1]); + mem->offsetof_spos_3_20 = offsetof(typeof(*mem), worker.i[3].ch[20]); // seems to be unneeded, no write-alloc? but just in case.. syscalls.cache_wb(&mem->sizeof_region_mem, 3 * 4, 1); break; case CCMD_DOIT: - do_channel_work(); - // c64_tools lib does BCACHE_wbInvAll() when it receives mailbox irq, - // so there is no benefit of syncing only what's needed. - // But call wbInvAll() anyway in case c64_tools is ever fixed.. - //sync_caches(); - syscalls.cache_wbInvAll(); + worker->active = ACTIVE_CNT; + worker->boot_cnt++; + syscalls.cache_inv(worker, 128, 1); + syscalls.cache_wb(&worker->i_done, 128, 1); + memcpy(&spu_config, &mem->in.spu_config, sizeof(spu_config)); + + if (worker->ram_dirty) + // it's faster to do it all than just a 512k buffer + syscalls.cache_wbInvAll(); + + do_processing(); + + syscalls.cache_inv(&mem->SB, sizeof(mem->SB), 0); + syscalls.cache_inv(&mem->in, sizeof(mem->in), 0); break; default: