+ syscalls.cache_wb(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to, 1);
+ // have to invalidate now, otherwise there is a race between
+ // DSP evicting dirty lines and ARM writing new data to this area
+ syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1);
+}
+
+static void do_processing(void)
+{
+ int left, dirty = 0, had_rvb = 0;
+ struct work_item *work;
+
+ while (worker->active)
+ {
+ // i_ready is in first cacheline
+ syscalls.cache_inv(worker, 64, 1);
+
+ left = worker->i_ready - worker->i_done;
+ if (left > 0) {
+ dirty = 1;
+ worker->active = ACTIVE_CNT;
+ syscalls.cache_wb(&worker->active, 4, 1);
+
+ work = &worker->i[worker->i_done & WORK_I_MASK];
+ invalidate_cache(work);
+ had_rvb |= work->rvb_addr;
+ spu.spuCtrl = work->ctrl;
+ do_channel_work(work);
+ writeout_cache(work);
+
+ worker->i_done++;
+ syscalls.cache_wb(&worker->i_done, 4, 1);
+ continue;
+ }
+
+ // nothing to do? Write out non-critical caches
+ if (dirty) {
+ syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1);
+ syscalls.cache_wb(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24, 1);
+ if (had_rvb) {
+ left = 0x40000 - spu.rvb->StartAddr;
+ syscalls.cache_wb(spu.spuMem + spu.rvb->StartAddr, left * 2, 1);
+ had_rvb = 0;
+ }
+ dirty = 0;
+ continue;
+ }
+
+ // this ->active loop thing is to avoid a race where we miss
+ // new work and clear ->active just after ARM checks it
+ worker->active--;
+ syscalls.cache_wb(&worker->active, 4, 1);
+ }