--- /dev/null
+language: cpp
+compiler:
+ - gcc
+ - clang
+before_install:
+ - sudo apt-get update -qq
+ - sudo apt-get install -y libsdl1.2-dev libasound2-dev libpng-dev libz-dev
+script: ./configure && make
PCSX-ReARMed - yet another PCSX fork
--------------------------------
+====================================
+
+[![Build Status](https://travis-ci.org/notaz/pcsx_rearmed.svg?branch=master)](https://travis-ci.org/notaz/pcsx_rearmed)
+
+*see [readme.txt](readme.txt) for more complete documentation*
PCSX ReARMed is yet another PCSX fork based on the PCSX-Reloaded project,
which itself contains code from PCSX, PCSX-df and PCSX-Revolution. This
version is ARM architecture oriented and features MIPS->ARM recompiler by
Ari64, NEON GTE code and more performance improvements. It was created for
Pandora handheld, but should be usable on other devices after some code
-adjustments (N900, GPH Wiz/Caanoo versions are also available).
+adjustments (N900, GPH Wiz/Caanoo, PlayBook versions are also available).
PCSX ReARMed features ARM NEON GPU by Exophase, that in many cases produces
pixel perfect graphics at very high performance. There is also Una-i's GPU
PCSX-Reloaded
--------------------------------
+=============
PCSX-Reloaded is a forked version of the dead PCSX emulator, with a nicer
interface and several improvements to stability and functionality.
-Subproject commit 515ac0b9d2c4d45a465335d54b8c49830914fcea
+Subproject commit 9fec8a91c9b19856ac0b51de53b847b38ed8dc61
{
memset(info, 0, sizeof(*info));
info->library_name = "PCSX-ReARMed";
- info->library_version = "r21";
+ info->library_version = "r22";
info->valid_extensions = "bin|cue|img|mdf|pbp|toc|cbn|m3u";
info->need_fullpath = true;
}
cycle_multiplier = 200;
#endif
pl_rearmed_cbs.gpu_peops.iUseDither = 1;
+ spu_config.iUseFixedUpdates = 1;
McdDisable[0] = 0;
McdDisable[1] = 1;
}
}
- vout_fbdev_clear(layer_fb);
buf = vout_fbdev_resize(layer_fb, w, h, *bpp,
l, r, t, b, 3);
+ vout_fbdev_clear(layer_fb);
+
omap_enable_layer(1);
return buf;
#include "../libpcsxcore/psxmem_map.h"
#include "../plugins/dfinput/externals.h"
+#define HUD_HEIGHT 10
+
int in_type1, in_type2;
int in_a1[2] = { 127, 127 }, in_a2[2] = { 127, 127 };
int in_adev[2] = { -1, -1 }, in_adev_axis[2][2] = {{ 0, 1 }, { 0, 1 }};
static void print_msg(int h, int border)
{
- hud_print(pl_vout_buf, pl_vout_w, border + 2, h - 10, hud_msg);
+ hud_print(pl_vout_buf, pl_vout_w, border + 2, h - HUD_HEIGHT, hud_msg);
}
static void print_fps(int h, int border)
{
- hud_printf(pl_vout_buf, pl_vout_w, border + 2, h - 10,
+ hud_printf(pl_vout_buf, pl_vout_w, border + 2, h - HUD_HEIGHT,
"%2d %4.1f", pl_rearmed_cbs.flips_per_sec,
pl_rearmed_cbs.vsps_cur);
}
static void print_cpu_usage(int w, int h, int border)
{
- hud_printf(pl_vout_buf, pl_vout_w, pl_vout_w - border - 28, h - 10,
- "%3d", pl_rearmed_cbs.cpu_usage);
+ hud_printf(pl_vout_buf, pl_vout_w, pl_vout_w - border - 28,
+ h - HUD_HEIGHT, "%3d", pl_rearmed_cbs.cpu_usage);
}
// draw 192x8 status of 24 sound channels
static const unsigned short colors[2] = { 0x1fe3, 0x0700 };
unsigned short *dest = (unsigned short *)pl_vout_buf +
- vout_w * (vout_h - 10) + vout_w / 2 - 192/2;
+ vout_w * (vout_h - HUD_HEIGHT) + vout_w / 2 - 192/2;
unsigned short *d, p;
int c, x, y;
#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
|| defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__)
+ || defined(__ARM_ARCH_7EM__) || defined(__ARM_ARCH_7S__)
#define HAVE_ARMV7
#define HAVE_ARMV6
psxCpu->Clear(madr, cdsize / 4);
pTransfer += cdsize;
-
- // burst vs normal
if( chcr == 0x11400100 ) {
+ HW_DMA3_MADR = SWAPu32(madr + cdsize);
CDRDMA_INT( (cdsize/4) / 4 );
}
else if( chcr == 0x11000000 ) {
- CDRDMA_INT( (cdsize/4) * 1 );
+ // CDRDMA_INT( (cdsize/4) * 1 );
+ // halted
+ psxRegs.cycle += (cdsize/4) * 24/2;
+ CDRDMA_INT(16);
}
return;
.align 2
.macro sgnxt16 rd rs
-#ifdef HAVE_ARMV7
+#ifdef HAVE_ARMV6
sxth \rd, \rs
#else
lsl \rd, \rs, #16
.endm
.macro ssatx rd wr bit
-#ifdef HAVE_ARMV7
+#ifdef HAVE_ARMV6
ssat \rd, #\bit, \rd
#else
cmp \rd, \wr
.endm
.macro usat16_ rd rs
-#ifdef HAVE_ARMV7
+#ifdef HAVE_ARMV6
usat \rd, #16, \rs
#else
subs \rd, \rs, #0
* 320x240x16@60Hz => 9.216 MB/s
* so 2.0 to 4.0 should be fine.
*/
-#define MDEC_BIAS 2.0f
+#define MDEC_BIAS 2
#define DSIZE 8
#define DSIZE2 (DSIZE * DSIZE)
int blk[DSIZE2 * 6];
u8 * image;
int size;
- int dmacnt;
+ u32 words;
if (chcr != 0x01000200) return;
- size = (bcr >> 16) * (bcr & 0xffff);
+ words = (bcr >> 16) * (bcr & 0xffff);
/* size in byte */
- size *= 4;
- /* I guess the memory speed is limitating */
- dmacnt = size;
+ size = words * 4;
if (!(mdec.reg1 & MDEC1_BUSY)) {
/* add to pending */
}
/* define the power of mdec */
- MDECOUTDMA_INT((int) ((dmacnt* MDEC_BIAS)));
+ MDECOUTDMA_INT(words * MDEC_BIAS);
}
}
assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
}else if(imm==65535) {
- #ifndef HAVE_ARMV7
+ #ifndef HAVE_ARMV6
assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
void emit_signextend16(int rs,int rt)
{
- #ifndef HAVE_ARMV7
+ #ifndef HAVE_ARMV6
emit_shlimm(rs,16,rt);
emit_sarimm(rt,16,rt);
#else
void emit_signextend8(int rs,int rt)
{
- #ifndef HAVE_ARMV7
+ #ifndef HAVE_ARMV6
emit_shlimm(rs,24,rt);
emit_sarimm(rt,24,rt);
#else
{
u32 start, end, main_ram;
- size *= 4; /* PCSX uses DMA units */
+ size *= 4; /* PCSX uses DMA units (words) */
evprintf("ari64_clear %08x %04x\n", addr, size);
return -1;
sbi_sectors = calloc(1, sector_count / 8);
- if (sbi_sectors == NULL)
+ if (sbi_sectors == NULL) {
+ fclose(sbihandle);
return -1;
+ }
// 4-byte SBI header
fread(buffer, 1, 4, sbihandle);
void psxDma4(u32 madr, u32 bcr, u32 chcr) { // SPU
u16 *ptr;
- u32 size;
+ u32 words;
switch (chcr) {
case 0x01000201: //cpu to spu transfer
#endif
break;
}
- SPU_writeDMAMem(ptr, (bcr >> 16) * (bcr & 0xffff) * 2, psxRegs.cycle);
- SPUDMA_INT((bcr >> 16) * (bcr & 0xffff) / 2);
+ words = (bcr >> 16) * (bcr & 0xffff);
+ SPU_writeDMAMem(ptr, words * 2, psxRegs.cycle);
+ HW_DMA4_MADR = SWAPu32(madr + words * 4);
+ SPUDMA_INT(words / 2);
return;
case 0x01000200: //spu to cpu transfer
#endif
break;
}
- size = (bcr >> 16) * (bcr & 0xffff) * 2;
- SPU_readDMAMem(ptr, size, psxRegs.cycle);
- psxCpu->Clear(madr, size);
- break;
+ words = (bcr >> 16) * (bcr & 0xffff);
+ SPU_readDMAMem(ptr, words * 2, psxRegs.cycle);
+ psxCpu->Clear(madr, words);
+
+ HW_DMA4_MADR = SWAPu32(madr + words * 4);
+ SPUDMA_INT(words / 2);
+ return;
#ifdef PSXDMA_LOG
default:
void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU
u32 *ptr;
+ u32 words;
u32 size;
switch (chcr) {
break;
}
// BA blocks * BS words (word = 32-bits)
- size = (bcr >> 16) * (bcr & 0xffff);
- GPU_readDataMem(ptr, size);
- psxCpu->Clear(madr, size);
+ words = (bcr >> 16) * (bcr & 0xffff);
+ GPU_readDataMem(ptr, words);
+ psxCpu->Clear(madr, words);
+
+ HW_DMA2_MADR = SWAPu32(madr + words * 4);
// already 32-bit word size ((size * 4) / 4)
- GPUDMA_INT(size / 4);
+ GPUDMA_INT(words / 4);
return;
case 0x01000201: // mem2vram
break;
}
// BA blocks * BS words (word = 32-bits)
- size = (bcr >> 16) * (bcr & 0xffff);
- GPU_writeDataMem(ptr, size);
+ words = (bcr >> 16) * (bcr & 0xffff);
+ GPU_writeDataMem(ptr, words);
+
+ HW_DMA2_MADR = SWAPu32(madr + words * 4);
// already 32-bit word size ((size * 4) / 4)
- GPUDMA_INT(size / 4);
+ GPUDMA_INT(words / 4);
return;
case 0x01000401: // dma chain
if ((int)size <= 0)
size = gpuDmaChainSize(madr);
HW_GPU_STATUS &= ~PSXGPU_nBUSY;
-
+
+ // we don't emulate progress, just busy flag and end irq,
+ // so pretend we're already at the last block
+ HW_DMA2_MADR = SWAPu32(0xffffff);
+
// Tekken 3 = use 1.0 only (not 1.5x)
// Einhander = parse linked list in pieces (todo)
}
void psxDma6(u32 madr, u32 bcr, u32 chcr) {
- u32 size;
+ u32 words;
u32 *mem = (u32 *)PSXM(madr);
#ifdef PSXDMA_LOG
}
// already 32-bit size
- size = bcr;
+ words = bcr;
while (bcr--) {
*mem-- = SWAP32((madr - 4) & 0xffffff);
}
mem++; *mem = 0xffffff;
- GPUOTCDMA_INT(size);
+ //GPUOTCDMA_INT(size);
+ // halted
+ psxRegs.cycle += words;
+ GPUOTCDMA_INT(16);
return;
}
#ifdef PSXDMA_LOG
include $(C64_TOOLS_DSP_ROOT)/install.mk
TARGET_BASENAME = pcsxr_spu
-OPTFLAGS += -O2
+OPTFLAGS += -o3
+CFLAGS += -mo
+#CFLAGS += -k -mw
CFLAGS += -DNO_OS -DWANT_THREAD_CODE
OBJ = \
\r
spu.spuAddr += 2;\r
spu.spuAddr &= 0x7fffe;\r
+ spu.bMemDirty = 1;\r
}\r
\r
////////////////////////////////////////////////////////////////////////\r
int i;\r
\r
do_samples_if_needed(cycles, 1);\r
+ spu.bMemDirty = 1;\r
\r
if(spu.spuAddr + iSize*2 < 0x80000)\r
{\r
\r
int dirty; // registers changed\r
\r
- // normalized offsets\r
- int nIIR_DEST_A0, nIIR_DEST_A1, nIIR_DEST_B0, nIIR_DEST_B1,\r
- nACC_SRC_A0, nACC_SRC_A1, nACC_SRC_B0, nACC_SRC_B1, \r
- nIIR_SRC_A0, nIIR_SRC_A1, nIIR_SRC_B0, nIIR_SRC_B1,\r
- nACC_SRC_C0, nACC_SRC_C1, nACC_SRC_D0, nACC_SRC_D1,\r
- nMIX_DEST_A0, nMIX_DEST_A1, nMIX_DEST_B0, nMIX_DEST_B1;\r
// MIX_DEST_xx - FB_SRC_x\r
- int nFB_SRC_A0, nFB_SRC_A1, nFB_SRC_B0, nFB_SRC_B1;\r
+ int FB_SRC_A0, FB_SRC_A1, FB_SRC_B0, FB_SRC_B1;\r
} REVERBInfo;\r
\r
///////////////////////////////////////////////////////////\r
int decode_dirty_ch;\r
unsigned int bSpuInit:1;\r
unsigned int bSPUIsOpen:1;\r
+ unsigned int bMemDirty:1; // had external write to SPU RAM\r
\r
unsigned int dwNoiseVal; // global noise generator\r
unsigned int dwNoiseCount;\r
\r
if(!pF) return 0; // first check\r
\r
+ do_samples(cycles, 1);\r
+\r
if(ulFreezeMode) // info or save?\r
{//--------------------------------------------------//\r
if(ulFreezeMode==1) \r
\r
if(ulFreezeMode==2) return 1; // info mode? ok, bye\r
// save mode:\r
- do_samples(cycles, 1);\r
-\r
memcpy(pF->cSPURam,spu.spuMem,0x80000); // copy common infos\r
memcpy(pF->cSPUPort,spu.regArea,0x200);\r
\r
\r
memcpy(spu.spuMem,pF->cSPURam,0x80000); // get ram\r
memcpy(spu.regArea,pF->cSPUPort,0x200);\r
+ spu.bMemDirty = 1;\r
\r
if(pF->xaS.nsamples<=4032) // start xa again\r
SPUplayADPCMchannel(&pF->xaS);\r
break;\r
//-------------------------------------------------//\r
case H_SPUReverbAddr:\r
- if(val==0xFFFF || val<=0x200)\r
- {spu.rvb->StartAddr=spu.rvb->CurrAddr=0;}\r
- else\r
- {\r
- const long iv=(unsigned long)val<<2;\r
- if(spu.rvb->StartAddr!=iv)\r
- {\r
- spu.rvb->StartAddr=(unsigned long)val<<2;\r
- spu.rvb->CurrAddr=spu.rvb->StartAddr;\r
- }\r
- }\r
goto rvbd;\r
//-------------------------------------------------//\r
case H_SPUirqAddr:\r
ReverbOn(16,24,val);\r
break;\r
//-------------------------------------------------//\r
- case H_Reverb+0 : spu.rvb->FB_SRC_A=val*4; goto rvbd;\r
- case H_Reverb+2 : spu.rvb->FB_SRC_B=val*4; goto rvbd;\r
- case H_Reverb+4 : spu.rvb->IIR_ALPHA=(short)val; goto rvbd;\r
- case H_Reverb+6 : spu.rvb->ACC_COEF_A=(short)val; goto rvbd;\r
- case H_Reverb+8 : spu.rvb->ACC_COEF_B=(short)val; goto rvbd;\r
- case H_Reverb+10 : spu.rvb->ACC_COEF_C=(short)val; goto rvbd;\r
- case H_Reverb+12 : spu.rvb->ACC_COEF_D=(short)val; goto rvbd;\r
- case H_Reverb+14 : spu.rvb->IIR_COEF=(short)val; goto rvbd;\r
- case H_Reverb+16 : spu.rvb->FB_ALPHA=(short)val; goto rvbd;\r
- case H_Reverb+18 : spu.rvb->FB_X=(short)val; goto rvbd;\r
- case H_Reverb+20 : spu.rvb->IIR_DEST_A0=val*4; goto rvbd;\r
- case H_Reverb+22 : spu.rvb->IIR_DEST_A1=val*4; goto rvbd;\r
- case H_Reverb+24 : spu.rvb->ACC_SRC_A0=val*4; goto rvbd;\r
- case H_Reverb+26 : spu.rvb->ACC_SRC_A1=val*4; goto rvbd;\r
- case H_Reverb+28 : spu.rvb->ACC_SRC_B0=val*4; goto rvbd;\r
- case H_Reverb+30 : spu.rvb->ACC_SRC_B1=val*4; goto rvbd;\r
- case H_Reverb+32 : spu.rvb->IIR_SRC_A0=val*4; goto rvbd;\r
- case H_Reverb+34 : spu.rvb->IIR_SRC_A1=val*4; goto rvbd;\r
- case H_Reverb+36 : spu.rvb->IIR_DEST_B0=val*4; goto rvbd;\r
- case H_Reverb+38 : spu.rvb->IIR_DEST_B1=val*4; goto rvbd;\r
- case H_Reverb+40 : spu.rvb->ACC_SRC_C0=val*4; goto rvbd;\r
- case H_Reverb+42 : spu.rvb->ACC_SRC_C1=val*4; goto rvbd;\r
- case H_Reverb+44 : spu.rvb->ACC_SRC_D0=val*4; goto rvbd;\r
- case H_Reverb+46 : spu.rvb->ACC_SRC_D1=val*4; goto rvbd;\r
- case H_Reverb+48 : spu.rvb->IIR_SRC_B1=val*4; goto rvbd;\r
- case H_Reverb+50 : spu.rvb->IIR_SRC_B0=val*4; goto rvbd;\r
- case H_Reverb+52 : spu.rvb->MIX_DEST_A0=val*4; goto rvbd;\r
- case H_Reverb+54 : spu.rvb->MIX_DEST_A1=val*4; goto rvbd;\r
- case H_Reverb+56 : spu.rvb->MIX_DEST_B0=val*4; goto rvbd;\r
- case H_Reverb+58 : spu.rvb->MIX_DEST_B1=val*4; goto rvbd;\r
- case H_Reverb+60 : spu.rvb->IN_COEF_L=(short)val; goto rvbd;\r
- case H_Reverb+62 : spu.rvb->IN_COEF_R=(short)val; goto rvbd;\r
+ case H_Reverb+0 : goto rvbd;\r
+ case H_Reverb+2 : goto rvbd;\r
+ case H_Reverb+4 : spu.rvb->IIR_ALPHA=(short)val; break;\r
+ case H_Reverb+6 : spu.rvb->ACC_COEF_A=(short)val; break;\r
+ case H_Reverb+8 : spu.rvb->ACC_COEF_B=(short)val; break;\r
+ case H_Reverb+10 : spu.rvb->ACC_COEF_C=(short)val; break;\r
+ case H_Reverb+12 : spu.rvb->ACC_COEF_D=(short)val; break;\r
+ case H_Reverb+14 : spu.rvb->IIR_COEF=(short)val; break;\r
+ case H_Reverb+16 : spu.rvb->FB_ALPHA=(short)val; break;\r
+ case H_Reverb+18 : spu.rvb->FB_X=(short)val; break;\r
+ case H_Reverb+20 : goto rvbd;\r
+ case H_Reverb+22 : goto rvbd;\r
+ case H_Reverb+24 : goto rvbd;\r
+ case H_Reverb+26 : goto rvbd;\r
+ case H_Reverb+28 : goto rvbd;\r
+ case H_Reverb+30 : goto rvbd;\r
+ case H_Reverb+32 : goto rvbd;\r
+ case H_Reverb+34 : goto rvbd;\r
+ case H_Reverb+36 : goto rvbd;\r
+ case H_Reverb+38 : goto rvbd;\r
+ case H_Reverb+40 : goto rvbd;\r
+ case H_Reverb+42 : goto rvbd;\r
+ case H_Reverb+44 : goto rvbd;\r
+ case H_Reverb+46 : goto rvbd;\r
+ case H_Reverb+48 : goto rvbd;\r
+ case H_Reverb+50 : goto rvbd;\r
+ case H_Reverb+52 : goto rvbd;\r
+ case H_Reverb+54 : goto rvbd;\r
+ case H_Reverb+56 : goto rvbd;\r
+ case H_Reverb+58 : goto rvbd;\r
+ case H_Reverb+60 : spu.rvb->IN_COEF_L=(short)val; break;\r
+ case H_Reverb+62 : spu.rvb->IN_COEF_R=(short)val; break;\r
}\r
return;\r
\r
\r
// get_buffer content helper: takes care about wraps\r
#define g_buffer(var) \\r
- ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)])\r
+ ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var)])\r
\r
// saturate iVal and store it as var\r
#define s_buffer(var, iVal) \\r
ssat32_to_16(iVal); \\r
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)] = iVal\r
+ spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var)] = iVal\r
\r
#define s_buffer1(var, iVal) \\r
ssat32_to_16(iVal); \\r
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var + 1)] = iVal\r
+ spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var + 1)] = iVal\r
\r
////////////////////////////////////////////////////////////////////////\r
\r
static void REVERBPrep(void)\r
{\r
REVERBInfo *rvb = spu.rvb;\r
- int space = 0x40000 - rvb->StartAddr;\r
- int t;\r
- #define prep_offs(v) \\r
- t = rvb->v; \\r
+ int space, t;\r
+\r
+ t = spu.regArea[(H_SPUReverbAddr - 0xc00) >> 1];\r
+ if (t == 0xFFFF || t <= 0x200)\r
+ spu.rvb->StartAddr = spu.rvb->CurrAddr = 0;\r
+ else if (spu.rvb->StartAddr != (t << 2))\r
+ spu.rvb->StartAddr = spu.rvb->CurrAddr = t << 2;\r
+\r
+ space = 0x40000 - rvb->StartAddr;\r
+\r
+ #define prep_offs(v, r) \\r
+ t = spu.regArea[(0x1c0 + r) >> 1] * 4; \\r
while (t >= space) \\r
t -= space; \\r
- rvb->n##v = t\r
- #define prep_offs2(d, v1, v2) \\r
- t = rvb->v1 - rvb->v2; \\r
+ rvb->v = t\r
+ #define prep_offs2(d, r1, r2) \\r
+ t = spu.regArea[(0x1c0 + r1) >> 1] * 4; \\r
+ t -= spu.regArea[(0x1c0 + r2) >> 1] * 4; \\r
+ while (t < 0) \\r
+ t += space; \\r
while (t >= space) \\r
t -= space; \\r
- rvb->n##d = t\r
-\r
- prep_offs(IIR_SRC_A0);\r
- prep_offs(IIR_SRC_A1);\r
- prep_offs(IIR_SRC_B0);\r
- prep_offs(IIR_SRC_B1);\r
- prep_offs(IIR_DEST_A0);\r
- prep_offs(IIR_DEST_A1);\r
- prep_offs(IIR_DEST_B0);\r
- prep_offs(IIR_DEST_B1);\r
- prep_offs(ACC_SRC_A0);\r
- prep_offs(ACC_SRC_A1);\r
- prep_offs(ACC_SRC_B0);\r
- prep_offs(ACC_SRC_B1);\r
- prep_offs(ACC_SRC_C0);\r
- prep_offs(ACC_SRC_C1);\r
- prep_offs(ACC_SRC_D0);\r
- prep_offs(ACC_SRC_D1);\r
- prep_offs(MIX_DEST_A0);\r
- prep_offs(MIX_DEST_A1);\r
- prep_offs(MIX_DEST_B0);\r
- prep_offs(MIX_DEST_B1);\r
- prep_offs2(FB_SRC_A0, MIX_DEST_A0, FB_SRC_A);\r
- prep_offs2(FB_SRC_A1, MIX_DEST_A1, FB_SRC_A);\r
- prep_offs2(FB_SRC_B0, MIX_DEST_B0, FB_SRC_B);\r
- prep_offs2(FB_SRC_B1, MIX_DEST_B1, FB_SRC_B);\r
+ rvb->d = t\r
+\r
+ prep_offs(IIR_SRC_A0, 32);\r
+ prep_offs(IIR_SRC_A1, 34);\r
+ prep_offs(IIR_SRC_B0, 36);\r
+ prep_offs(IIR_SRC_B1, 38);\r
+ prep_offs(IIR_DEST_A0, 20);\r
+ prep_offs(IIR_DEST_A1, 22);\r
+ prep_offs(IIR_DEST_B0, 36);\r
+ prep_offs(IIR_DEST_B1, 38);\r
+ prep_offs(ACC_SRC_A0, 24);\r
+ prep_offs(ACC_SRC_A1, 26);\r
+ prep_offs(ACC_SRC_B0, 28);\r
+ prep_offs(ACC_SRC_B1, 30);\r
+ prep_offs(ACC_SRC_C0, 40);\r
+ prep_offs(ACC_SRC_C1, 42);\r
+ prep_offs(ACC_SRC_D0, 44);\r
+ prep_offs(ACC_SRC_D1, 46);\r
+ prep_offs(MIX_DEST_A0, 52);\r
+ prep_offs(MIX_DEST_A1, 54);\r
+ prep_offs(MIX_DEST_B0, 56);\r
+ prep_offs(MIX_DEST_B1, 58);\r
+ prep_offs2(FB_SRC_A0, 52, 0);\r
+ prep_offs2(FB_SRC_A1, 54, 0);\r
+ prep_offs2(FB_SRC_B0, 56, 2);\r
+ prep_offs2(FB_SRC_B1, 58, 2);\r
\r
#undef prep_offs\r
#undef prep_offs2\r
}
if (spu.rvb->StartAddr) {
- if (do_rvb) {
- if (unlikely(spu.rvb->dirty))
- REVERBPrep();
-
+ if (do_rvb)
REVERBDo(spu.SSumLR, RVB, ns_to, spu.rvb->CurrAddr);
- }
spu.rvb->CurrAddr += ns_to / 2;
while (spu.rvb->CurrAddr >= 0x40000)
unsigned int i_ready;
unsigned int i_reaped;
unsigned int last_boot_cnt; // dsp
+ unsigned int ram_dirty;
};
// aligning for C64X_DSP
unsigned int _pad0[128/4];
work->rvb_addr = 0;
if (spu.rvb->StartAddr) {
- if (spu_config.iUseReverb) {
- if (unlikely(spu.rvb->dirty))
- REVERBPrep();
+ if (spu_config.iUseReverb)
work->rvb_addr = spu.rvb->CurrAddr;
- }
spu.rvb->CurrAddr += ns_to / 2;
while (spu.rvb->CurrAddr >= 0x40000)
{
unsigned int mask;
unsigned int decode_dirty_ch = 0;
+ const SPUCHAN *s_chan;
int *SB, sinc, spos, sbpos;
int d, ch, ns_to;
- SPUCHAN *s_chan;
ns_to = work->ns_to;
struct work_item *work;
int done, used_space;
+ // rvb offsets will change, thread may be using them
+ force |= spu.rvb->dirty && spu.rvb->StartAddr;
+
done = thread_get_i_done() - worker->i_reaped;
used_space = worker->i_ready - worker->i_reaped;
+
//printf("done: %d use: %d dsp: %u/%u\n", done, used_space,
// worker->boot_cnt, worker->i_done);
}
}
+ if (unlikely(spu.rvb->dirty))
+ REVERBPrep();
+
if (do_direct || worker == NULL || !spu_config.iUseThread) {
do_channels(ns_to);
do_samples_finish(spu.SSumLR, ns_to, silentch, spu.decode_pos);
void CALLBACK SPUasync(unsigned int cycle, unsigned int flags)
{
- do_samples(cycle, 0);
+ do_samples(cycle, spu_config.iUseFixedUpdates);
if (spu.spuCtrl & CTRL_IRQ)
schedule_next_irq();
unsigned int req_sent:1;
} f;
+static noinline void dsp_fault(void)
+{
+ dsp_msg_t msg;
+
+ f.dsp_cache_inv_virt(worker, sizeof(*worker));
+ printf("dsp crash/fault/corruption:\n");
+ printf("state rdy/reap/done: %u %u %u\n",
+ worker->i_ready, worker->i_reaped, worker->i_done);
+ printf("active/boot: %u %u\n",
+ worker->active, worker->boot_cnt);
+
+ if (f.req_sent) {
+ f.dsp_rpc_recv(&msg);
+ f.req_sent = 0;
+ }
+ f.dsp_logbuf_print();
+ spu_config.iUseThread = 0;
+}
+
static void thread_work_start(void)
{
struct region_mem *mem;
f.dsp_cache_inv_virt(&worker->i_done, 64);
worker->last_boot_cnt = worker->boot_cnt;
+ worker->ram_dirty = spu.bMemDirty;
+ spu.bMemDirty = 0;
mem = (void *)f.region.virt_addr;
- memcpy(&mem->spu_config, &spu_config, sizeof(mem->spu_config));
+ memcpy(&mem->in.spu_config, &spu_config, sizeof(mem->in.spu_config));
DSP_MSG_INIT(&msg, f.compid, CCMD_DOIT, f.region.phys_addr, 0);
ret = f.dsp_rpc_send(&msg);
return;
}
f.req_sent = 1;
+
+#if 0
+ f.dsp_rpc_recv(&msg);
+ f.req_sent = 0;
+#endif
}
static int thread_get_i_done(void)
int limit = 1000;
int ns_to;
+ if ((unsigned int)(worker->i_done - worker->i_reaped) > WORK_MAXCNT) {
+ dsp_fault();
+ return;
+ }
+
while (worker->i_done == worker->i_reaped && limit-- > 0) {
if (!f.req_sent) {
printf("dsp: req not sent?\n");
}
mem = (void *)f.region.virt_addr;
- memcpy(&mem->spu_config, &spu_config, sizeof(mem->spu_config));
+ memcpy(&mem->in.spu_config, &spu_config, sizeof(mem->in.spu_config));
DSP_MSG_INIT(&init_msg, f.compid, CCMD_INIT, f.region.phys_addr, 0);
ret = f.dsp_rpc(&init_msg, &msg_in);
worker = NULL;
}
+/* debug: "access" shared mem from gdb */
+#if 0
+struct region_mem *dbg_dsp_mem;
+
+void dbg_dsp_mem_update(void)
+{
+ struct region_mem *mem;
+
+ if (dbg_dsp_mem == NULL)
+ dbg_dsp_mem = malloc(sizeof(*dbg_dsp_mem));
+ if (dbg_dsp_mem == NULL)
+ return;
+
+ mem = (void *)f.region.virt_addr;
+ f.dsp_cache_inv_virt(mem, sizeof(*mem));
+ memcpy(dbg_dsp_mem, mem, sizeof(*dbg_dsp_mem));
+}
+#endif
+
// vim:shiftwidth=1:expandtab
// these are not to be modified by DSP
SPUCHAN s_chan[24 + 1];
REVERBInfo rvb;
+ SPUConfig spu_config;
} in;
int _pad2[128/4 - ((sizeof(struct spu_in) / 4) & (128/4 - 1))];
struct spu_worker worker;
- SPUConfig spu_config;
// init/debug
int sizeof_region_mem;
int offsetof_s_chan1;
void SetupSound(void) {}
+static void enable_l2_cache(void)
+{
+ volatile uint32_t *L2CFG = (volatile uint32_t *)0x01840000;
+ uint32_t *MARi = (void *)0x01848000;
+ int i;
+
+ // program Memory Attribute Registers
+ // (old c64_tools has the defaults messed up)
+ // 00000000-0fffffff - not configurable
+ // 10000000-7fffffff - system
+ for (i = 0x10; i < 0x80; i++)
+ MARi[i] = 0;
+ // 80000000-9fffffff - RAM
+ for ( ; i < 0xa0; i++)
+ MARi[i] = 1;
+ // 0xa00000-ffffffff - reserved, etc
+ for ( ; i < 0x100; i++)
+ MARi[i] = 0;
+
+ // enable L2 (1 for 32k, 2 for 64k)
+ if (!(*L2CFG & 2)) {
+ *L2CFG = 2;
+ // wait the for the write
+ *L2CFG;
+ }
+}
+
static void invalidate_cache(struct work_item *work)
{
// see comment in writeout_cache()
//syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1);
- syscalls.cache_inv(spu.s_chan, sizeof(spu.s_chan[0]) * 24, 0);
+ syscalls.cache_inv(spu.s_chan, sizeof(spu.s_chan[0]) * 24, 1);
syscalls.cache_inv(work->SSumLR,
- sizeof(work->SSumLR[0]) * 2 * work->ns_to, 0);
+ sizeof(work->SSumLR[0]) * 2 * work->ns_to, 1);
}
static void writeout_cache(struct work_item *work)
syscalls.cache_wb(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to, 1);
// have to invalidate now, otherwise there is a race between
// DSP evicting dirty lines and ARM writing new data to this area
- syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 0);
+ syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1);
}
static void do_processing(void)
switch (cmd) {
case CCMD_INIT:
+ enable_l2_cache();
InitADSR();
spu.spuMemC = mem->spu_ram;
spu.s_chan = mem->in.s_chan;
spu.rvb = &mem->in.rvb;
worker = &mem->worker;
- memcpy(&spu_config, &mem->spu_config, sizeof(spu_config));
+ memcpy(&spu_config, &mem->in.spu_config, sizeof(spu_config));
mem->sizeof_region_mem = sizeof(*mem);
mem->offsetof_s_chan1 = offsetof(typeof(*mem), in.s_chan[1]);
case CCMD_DOIT:
worker->active = ACTIVE_CNT;
worker->boot_cnt++;
- syscalls.cache_wb(&worker->i_done, 64, 1);
- memcpy(&spu_config, &mem->spu_config, sizeof(spu_config));
+ syscalls.cache_inv(worker, 128, 1);
+ syscalls.cache_wb(&worker->i_done, 128, 1);
+ memcpy(&spu_config, &mem->in.spu_config, sizeof(spu_config));
+
+ if (worker->ram_dirty)
+ // it's faster to do it all than just a 512k buffer
+ syscalls.cache_wbInvAll();
do_processing();
- // c64_tools lib does BCACHE_wbInvAll() when it receives mailbox irq,
- // but invalidate anyway in case c64_tools is ever fixed..
- // XXX edit: don't bother as reverb is not handled, will fix if needed
- //syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0);
- //syscalls.cache_inv(&mem->in, sizeof(mem->in), 0);
+ syscalls.cache_inv(&mem->SB, sizeof(mem->SB), 0);
+ syscalls.cache_inv(&mem->in, sizeof(mem->in), 0);
break;
default:
int iUseInterpolation;
int iTempo;
int iUseThread;
+ int iUseFixedUpdates; // output fixed number of samples/frame
// status
int iThreadAvail;
{
if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
// lower 16 bit
- u32 data = (unsigned long)pvram[px];
+ u32 data = pvram[px];
if (++px>=x_end)
{
if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024;
// higher 16 bit (always, even if it's an odd width)
- data |= (unsigned long)(pvram[px])<<16;
+ data |= (u32)(pvram[px])<<16;
*dmaAddress++ = data;
///////////////////////////////////////////////////////////////////////////////
INLINE void gpuSetTexture(u16 tpage)
{
- long tp;
- long tx, ty;
- GPU_GP1 = (GPU_GP1 & ~0x7FF) | (tpage & 0x7FF);
+ u32 tp;
+ u32 tx, ty;
+ GPU_GP1 = (GPU_GP1 & ~0x1FF) | (tpage & 0x1FF);
TextureWindow[0]&= ~TextureWindow[2];
TextureWindow[1]&= ~TextureWindow[3];
tp = (tpage >> 7) & 3;
tx = (tpage & 0x0F) << 6;
ty = (tpage & 0x10) << 4;
+ if (tp == 3) tp = 2;
tx += (TextureWindow[0] >> (2 - tp));
ty += TextureWindow[1];
case 0xE5:
{
const u32 temp = PacketBuffer.U4[0];
- DrawingOffset[0] = ((long)temp<<(32-11))>>(32-11);
- DrawingOffset[1] = ((long)temp<<(32-22))>>(32-11);
+ DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11);
+ DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11);
//isSkip = false;
DO_LOG(("DrawingOffset(0x%x)\n",PRIM));
}
#define GPU_DIGITS 16
#define GPU_DIGITSC (GPU_DIGITS+3)
-INLINE long GPU_DIV(long rs, long rt)
+INLINE s32 GPU_DIV(s32 rs, s32 rt)
{
return rt ? (rs / rt) : (0);
}
}
case 0xE5: {
const u32 temp = PacketBuffer.U4[0];
- DrawingOffset[0] = ((long)temp<<(32-11))>>(32-11);
- DrawingOffset[1] = ((long)temp<<(32-22))>>(32-11);
+ DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11);
+ DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11);
gpu.ex_regs[5] = temp;
break;
}
#define unlikely(x)
#define preload(...)
#define noinline
-#error huh
#endif
#define gpu_log(fmt, ...) \
long GPUdmaChain(uint32_t *rambase, uint32_t start_addr)
{
uint32_t addr, *list, ld_addr = 0;
- uint32_t *llist_entry = NULL;
int len, left, count;
long cpu_cycles = 0;
if (unlikely(gpu.cmd_len > 0))
flush_cmd_buffer();
- // ff7 sends it's main list twice, detect this
- if (*gpu.state.frame_count == gpu.state.last_list.frame &&
- *gpu.state.hcnt - gpu.state.last_list.hcnt <= 1 &&
- gpu.state.last_list.cycles > 2048)
- {
- llist_entry = rambase + (gpu.state.last_list.addr & 0x1fffff) / 4;
- *llist_entry |= 0x800000;
- }
-
log_io("gpu_dma_chain\n");
addr = start_addr & 0xffffff;
for (count = 0; (addr & 0x800000) == 0; count++)
}
}
- if (llist_entry)
- *llist_entry &= ~0x800000;
-
gpu.state.last_list.frame = *gpu.state.frame_count;
gpu.state.last_list.hcnt = *gpu.state.hcnt;
gpu.state.last_list.cycles = cpu_cycles;
{
int w = gpu.screen.hres;
int h = gpu.screen.h;
+
+ check_mode_change(0);
if (gpu.state.enhancement_active) {
w *= 2;
h *= 2;
}
- check_mode_change(0);
cbs->pl_vout_flip(NULL, 1024, gpu.status.rgb24, w, h);
}
Changelog
---------
+r22 (2015-02-05)
+* general: fixed a race condition/crash in threaded SPU mode
+* pandora: C64x: fixed compatibility with newer c64_tools, enabled L2 cache
+* frontend: fixed control config corruption on load for devices that are
+ disconnected on startup
+* some dma accuracy improvements, might fix occasional glitches in ff7
+* ARMv6 build and the dynarec now make use of available instructions (gizmo98)
+
r21 (2015-01-12)
+ general: added ability to run SPU emulation on a separate thread, enabled it
by default when multicore CPU is detected. Significant effort was made to