output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
}
+static void emit_negs(int rs, int rt)
+{
+ assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
+ output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
+}
+
static void emit_sub(int rs1,int rs2,int rt)
{
assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
}
+static void emit_subs(int rs1,int rs2,int rt)
+{
+ assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
+ output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
+}
+
static void emit_zeroreg(int rt)
{
assem_debug("mov %s,#0\n",regname[rt]);
output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
}
+static void emit_mvneq(int rs,int rt)
+{
+ assem_debug("mvneq %s,%s\n",regname[rt],regname[rs]);
+ output_w32(0x01e00000|rd_rn_rm(rt,0,rs));
+}
+
static void emit_and(u_int rs1,u_int rs2,u_int rt)
{
assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
emit_addimm(rs, imm, rt);
}
-static void emit_addimm_and_set_flags(int imm,int rt)
+static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
{
assert(imm>-65536&&imm<65536);
u_int armval;
- if(genimm(imm,&armval)) {
- assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
- output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
- }else if(genimm(-imm,&armval)) {
- assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
- output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
- }else if(imm<0) {
- assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
+ if (genimm(imm, &armval)) {
+ assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rs],imm);
+ output_w32(0xe2900000|rd_rn_rm(rt,rs,0)|armval);
+ } else if (genimm(-imm, &armval)) {
+ assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rs],imm);
+ output_w32(0xe2500000|rd_rn_rm(rt,rs,0)|armval);
+ } else if (rs != rt) {
+ emit_movimm(imm, rt);
+ emit_adds(rs, rt, rt);
+ } else if (imm < 0) {
+ assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
- output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
+ output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
- }else{
- assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
+ } else {
+ assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
- output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
+ output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
}
}
+static void emit_addimm_and_set_flags(int imm, u_int rt)
+{
+ emit_addimm_and_set_flags3(rt, imm, rt);
+}
+
static void emit_addnop(u_int r)
{
assert(r<16);
output_w32(0xaa000000|offset);
}
+static void emit_jo(const void *a_)
+{
+ int a = (int)a_;
+ assem_debug("bvs %x\n",a);
+ u_int offset=genjmp(a);
+ output_w32(0x6a000000|offset);
+}
+
static void emit_jno(const void *a_)
{
int a = (int)a_;
{
uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
assert(offset<4096);
- assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
+ assem_debug("ldr %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset));
output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
}
#define emit_readptr emit_readword
{
uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
assert(offset<4096);
- assem_debug("str %s,fp+%d\n",regname[rt],offset);
+ assem_debug("str %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset));
output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
}
u_int reglist=stubs[n].e;
const signed char *i_regmap=i_regs->regmap;
int rt;
- if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
+ if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
rt=get_reg(i_regmap,FTEMP);
}else{
rt=get_reg(i_regmap,dops[i].rt1);
emit_shrimm(rs,12,temp2);
emit_readword_dualindexedx4(temp,temp2,temp2);
emit_lsls_imm(temp2,1,temp2);
- if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
+ if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
switch(type) {
case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
emit_loadreg(CCREG,2);
emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
emit_far_call(handler);
- if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
+ if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
mov_loadtype_adj(type,0,rt);
}
if(restore_jump)
u_int reglist=stubs[n].e;
const signed char *i_regmap=i_regs->regmap;
int rt,r;
- if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
+ if(dops[i].itype==C2LS) {
rt=get_reg(i_regmap,r=FTEMP);
}else{
rt=get_reg(i_regmap,r=dops[i].rs2);
// case 0x19: MULTU
// case 0x1A: DIV
// case 0x1B: DIVU
- // case 0x1C: DMULT
- // case 0x1D: DMULTU
- // case 0x1E: DDIV
- // case 0x1F: DDIVU
if(dops[i].rs1&&dops[i].rs2)
{
- if((dops[i].opcode2&4)==0) // 32-bit
+ switch (dops[i].opcode2)
{
- if(dops[i].opcode2==0x18) // MULT
+ case 0x18: // MULT
{
signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
assert(lo>=0);
emit_smull(m1,m2,hi,lo);
}
- if(dops[i].opcode2==0x19) // MULTU
+ break;
+ case 0x19: // MULTU
{
signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
assert(lo>=0);
emit_umull(m1,m2,hi,lo);
}
- if(dops[i].opcode2==0x1A) // DIV
+ break;
+ case 0x1A: // DIV
{
signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
- assert(d1>=0);
- assert(d2>=0);
signed char quotient=get_reg(i_regs->regmap,LOREG);
signed char remainder=get_reg(i_regs->regmap,HIREG);
+ void *jaddr_div0;
+ assert(d1>=0);
+ assert(d2>=0);
assert(quotient>=0);
assert(remainder>=0);
emit_movs(d1,remainder);
emit_negmi(quotient,quotient); // .. quotient and ..
emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
emit_movs(d2,HOST_TEMPREG);
- emit_jeq(out+52); // Division by zero
+ jaddr_div0 = out;
+ emit_jeq(0); // Division by zero
emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
#ifdef HAVE_ARMV5
emit_clz(HOST_TEMPREG,quotient);
- emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
+ emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); // shifted divisor
#else
emit_movimm(0,quotient);
emit_addpl_imm(quotient,1,quotient);
emit_jcc(out-16); // -4
emit_teq(d1,d2);
emit_negmi(quotient,quotient);
+ set_jump_target(jaddr_div0, out);
emit_test(d1,d1);
emit_negmi(remainder,remainder);
}
- if(dops[i].opcode2==0x1B) // DIVU
+ break;
+ case 0x1B: // DIVU
{
signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
- assert(d1>=0);
- assert(d2>=0);
signed char quotient=get_reg(i_regs->regmap,LOREG);
signed char remainder=get_reg(i_regs->regmap,HIREG);
+ void *jaddr_div0;
+ assert(d1>=0);
+ assert(d2>=0);
assert(quotient>=0);
assert(remainder>=0);
emit_mov(d1,remainder);
emit_movimm(0xffffffff,quotient); // div0 case
emit_test(d2,d2);
- emit_jeq(out+40); // Division by zero
+ jaddr_div0 = out;
+ emit_jeq(0); // Division by zero
#ifdef HAVE_ARMV5
emit_clz(d2,HOST_TEMPREG);
emit_movimm(1<<31,quotient);
emit_adcs(quotient,quotient,quotient);
emit_shrcc_imm(d2,1,d2);
emit_jcc(out-16); // -4
+ set_jump_target(jaddr_div0, out);
}
+ break;
}
- else // 64-bit
- assert(0);
}
else
{
- // Multiply by zero is zero.
- // MIPS does not have a divide by zero exception.
- // The result is undefined, we return zero.
signed char hr=get_reg(i_regs->regmap,HIREG);
signed char lr=get_reg(i_regs->regmap,LOREG);
- if(hr>=0) emit_zeroreg(hr);
- if(lr>=0) emit_zeroreg(lr);
+ if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
+ {
+ if (dops[i].rs1) {
+ signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
+ assert(numerator >= 0);
+ if (hr < 0)
+ hr = HOST_TEMPREG;
+ emit_movs(numerator, hr);
+ if (lr >= 0) {
+ if (dops[i].opcode2 == 0x1A) { // DIV
+ emit_movimm(0xffffffff, lr);
+ emit_negmi(lr, lr);
+ }
+ else
+ emit_movimm(~0, lr);
+ }
+ }
+ else {
+ if (hr >= 0) emit_zeroreg(hr);
+ if (lr >= 0) emit_movimm(~0,lr);
+ }
+ }
+ else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
+ {
+ signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
+ assert(denominator >= 0);
+ if (hr >= 0) emit_zeroreg(hr);
+ if (lr >= 0) {
+ emit_zeroreg(lr);
+ emit_test(denominator, denominator);
+ emit_mvneq(lr, lr);
+ }
+ }
+ else
+ {
+ // Multiply by zero is zero.
+ if (hr >= 0) emit_zeroreg(hr);
+ if (lr >= 0) emit_zeroreg(lr);
+ }
}
}
#define multdiv_assemble multdiv_assemble_arm
output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
}
+static void emit_adds(u_int rs1, u_int rs2, u_int rt)
+{
+ assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
+ output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
+}
+
static void emit_add64(u_int rs1, u_int rs2, u_int rt)
{
assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
}
#define emit_adds_ptr emit_adds64
+static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
+{
+ assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
+ output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
+}
+
static void emit_neg(u_int rs, u_int rt)
{
assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
}
+static void emit_negs(u_int rs, u_int rt)
+{
+ assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
+ output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
+}
+
static void emit_sub(u_int rs1, u_int rs2, u_int rt)
{
assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
}
-static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
+static void emit_subs(u_int rs1, u_int rs2, u_int rt)
+{
+ assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
+ output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
+}
+
+static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
{
assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
{
uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
if (!(offset & 3) && offset <= 16380) {
- assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
+ assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
}
else
{
uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
if (!(offset & 7) && offset <= 32760) {
- assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
+ assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
}
else
{
uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
if (!(offset & 3) && offset <= 16380) {
- assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
+ assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
}
else
{
uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
if (!(offset & 7) && offset <= 32760) {
- assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
+ assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
}
else
output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
}
+static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
+{
+ assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
+ output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
+}
+
static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
{
assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
}
- else if (imm < 16777216) {
- assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
- output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
- if ((imm & 0xfff) || s) {
- assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
- output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
+ else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
+ assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
+ output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
+ if (imm & 0xfff) {
+ assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
+ output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
}
}
- else if (-imm < 16777216) {
- assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
- output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
- if ((imm & 0xfff) || s) {
- assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
- output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
+ else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
+ assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
+ output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
+ if (-imm & 0xfff) {
+ assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
+ output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
}
}
- else
- abort();
+ else {
+ u_int tmp = rt;
+ assert(!is64);
+ if (rs == rt) {
+ host_tempreg_acquire();
+ tmp = HOST_TEMPREG;
+ }
+ emit_movimm(imm, tmp);
+ assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
+ output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
+ if (tmp == HOST_TEMPREG)
+ host_tempreg_release();
+ }
}
static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
emit_addimm_s(1, 0, rt, imm, rt);
}
+static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
+{
+ emit_addimm_s(1, 0, rs, imm, rt);
+}
+
static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
{
const char *names[] = { "and", "orr", "eor", "ands" };
output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
}
+static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
+{
+ assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
+ output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
+}
+
static void emit_slti32(u_int rs,int imm,u_int rt)
{
if(rs!=rt) emit_zeroreg(rt);
output_w32(0x54000000 | (offset << 5) | COND_GE);
}
+static void emit_jo(const void *a)
+{
+ assem_debug("bvs %p\n", a);
+ u_int offset = genjmpcc(a);
+ output_w32(0x54000000 | (offset << 5) | COND_VS);
+}
+
static void emit_jno(const void *a)
{
assem_debug("bvc %p\n", a);
u_int reglist = stubs[n].e;
const signed char *i_regmap = i_regs->regmap;
int rt;
- if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
+ if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
rt=get_reg(i_regmap,FTEMP);
}else{
rt=get_reg(i_regmap,dops[i].rt1);
emit_adds64(temp2,temp2,temp2);
handler_jump=out;
emit_jc(0);
- if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
+ if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
switch(type) {
case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
emit_far_call(handler);
// (no cycle reload after read)
- if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
+ if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
loadstore_extend(type,0,rt);
}
if(restore_jump)
u_int reglist=stubs[n].e;
signed char *i_regmap=i_regs->regmap;
int rt,r;
- if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
+ if(dops[i].itype==C2LS) {
rt=get_reg(i_regmap,r=FTEMP);
}else{
rt=get_reg(i_regmap,r=dops[i].rs2);
// div 0 quotient (remainder is already correct)
host_tempreg_acquire();
- if (dops[i].opcode2 == 0x1A) // DIV
- emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
+ if (dops[i].opcode2 == 0x1A) { // DIV
+ emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
+ emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
+ }
else
emit_movimm(~0,HOST_TEMPREG);
emit_test(denominator,denominator);
if (hr >= 0)
emit_mov(numerator,hr);
if (lr >= 0) {
- if (dops[i].opcode2 == 0x1A) // DIV
- emit_sub_asrimm(0,numerator,31,lr);
+ if (dops[i].opcode2 == 0x1A) { // DIV
+ emit_add_lsrimm(WZR,numerator,31,lr);
+ emit_orn_asrimm(lr,numerator,31,lr);
+ }
else
emit_movimm(~0,lr);
}
if (lr >= 0) emit_movimm(~0,lr);
}
}
+ else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
+ {
+ signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
+ assert(denominator >= 0);
+ if (hr >= 0) emit_zeroreg(hr);
+ if (lr >= 0) {
+ emit_zeroreg(lr);
+ emit_test(denominator, denominator);
+ emit_csinvne_reg(lr, lr, lr);
+ }
+ }
else
{
// Multiply by zero is zero.
#include "../psxinterpreter.h"
#include "../gte.h"
#include "emu_if.h" // emulator interface
+#include "linkage_offsets.h"
+#include "compiler_features.h"
#include "arm_features.h"
-#define unused __attribute__((unused))
-#ifdef __clang__
-#define noinline __attribute__((noinline))
-#else
-#define noinline __attribute__((noinline,noclone))
-#endif
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#endif
//#define DISASM
//#define ASSEM_PRINT
+//#define REGMAP_PRINT // with DISASM only
//#define INV_DEBUG_W
//#define STAT_PRINT
// stubs
enum stub_type {
CC_STUB = 1,
- FP_STUB = 2,
+ //FP_STUB = 2,
LOADB_STUB = 3,
LOADH_STUB = 4,
LOADW_STUB = 5,
- LOADD_STUB = 6,
+ //LOADD_STUB = 6,
LOADBU_STUB = 7,
LOADHU_STUB = 8,
STOREB_STUB = 9,
STOREH_STUB = 10,
STOREW_STUB = 11,
- STORED_STUB = 12,
+ //STORED_STUB = 12,
STORELR_STUB = 13,
INVCODE_STUB = 14,
+ OVERFLOW_STUB = 15,
};
// regmap_pre[i] - regs before [i] insn starts; dirty things here that
uint64_t dirty;
uint64_t u;
u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true
- u_int isconst; // ... but isconst is false when r2 is known
+ u_int isconst; // ... but isconst is false when r2 is known (hr)
u_int loadedconst; // host regs that have constants loaded
//u_int waswritten; // MIPS regs that were used as store base before
};
static struct decoded_insn
{
u_char itype;
- u_char opcode;
- u_char opcode2;
+ u_char opcode; // bits 31-26
+ u_char opcode2; // (depends on opcode)
u_char rs1;
u_char rs2;
u_char rt1;
u_char is_ujump:1;
u_char is_load:1;
u_char is_store:1;
+ u_char is_delay_load:1; // is_load + MFC/CFC
+ u_char is_exception:1; // unconditional, also interp. fallback
+ u_char may_except:1; // might generate an exception
} dops[MAXBLOCK];
static u_char *out;
#define STORE 2 // Store
#define LOADLR 3 // Unaligned load
#define STORELR 4 // Unaligned store
-#define MOV 5 // Move
+#define MOV 5 // Move (hi/lo only)
#define ALU 6 // Arithmetic/logic
#define MULTDIV 7 // Multiply/divide
#define SHIFT 8 // Shift by register
#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
#define SJUMP 14 // Conditional branch (regimm format)
#define COP0 15 // Coprocessor 0
-#define COP1 16 // Coprocessor 1
-#define C1LS 17 // Coprocessor 1 load/store
-//#define FJUMP 18 // Conditional branch (floating point)
-//#define FLOAT 19 // Floating point unit
-//#define FCONV 20 // Convert integer to float
-//#define FCOMP 21 // Floating point compare (sets FSREG)
+#define RFE 16
#define SYSCALL 22// SYSCALL,BREAK
-#define OTHER 23 // Other
-//#define SPAN 24 // Branch/delay slot spans 2 pages
-#define NI 25 // Not implemented
+#define OTHER 23 // Other/unknown - do nothing
#define HLECALL 26// PCSX fake opcodes for HLE
#define COP2 27 // Coprocessor 2 move
#define C2LS 28 // Coprocessor 2 load/store
// asm linkage
void dyna_linker();
void cc_interrupt();
-void fp_exception();
-void fp_exception_ds();
void jump_syscall (u_int u0, u_int u1, u_int pc);
void jump_syscall_ds(u_int u0, u_int u1, u_int pc);
void jump_break (u_int u0, u_int u1, u_int pc);
void jump_break_ds(u_int u0, u_int u1, u_int pc);
+void jump_overflow (u_int u0, u_int u1, u_int pc);
+void jump_overflow_ds(u_int u0, u_int u1, u_int pc);
void jump_to_new_pc();
void call_gteStall();
void new_dyna_leave();
static int new_recompile_block(u_int addr);
static void invalidate_block(struct block_info *block);
+static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_);
// Needed by assembler
static void wb_register(signed char r, const signed char regmap[], uint64_t dirty);
return ndrc_get_addr_ht(vaddr);
// generate an address error
- psxRegs.CP0.n.SR |= 2;
- psxRegs.CP0.n.Cause = (vaddr<<31) | (4<<2);
- psxRegs.CP0.n.EPC = (vaddr&1) ? vaddr-5 : vaddr;
- psxRegs.CP0.n.BadVAddr = vaddr & ~1;
+ psxRegs.CP0.n.Cause &= 0x300;
+ psxRegs.CP0.n.Cause |= R3000E_AdEL << 2;
+ psxRegs.CP0.n.EPC = vaddr;
+ psxRegs.pc = 0x80000080;
return ndrc_get_addr_ht(0x80000080);
}
#endif
+// get reg suitable for writing
+static signed char get_reg_w(const signed char regmap[], signed char r)
+{
+ return r == 0 ? -1 : get_reg(regmap, r);
+}
+
// get reg as mask bit (1 << hr)
static u_int get_regm(const signed char regmap[], signed char r)
{
j++;
break;
}
- if(dops[i+j].itype==SYSCALL||dops[i+j].itype==HLECALL||dops[i+j].itype==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
+ if (dops[i+j].is_exception)
{
break;
}
FUNCNAME(jump_break_ds),
FUNCNAME(jump_syscall),
FUNCNAME(jump_syscall_ds),
+ FUNCNAME(jump_overflow),
+ FUNCNAME(jump_overflow_ds),
FUNCNAME(call_gteStall),
FUNCNAME(new_dyna_leave),
FUNCNAME(pcsx_mtc0),
return function_names[i].name;
return "";
}
+
+static const char *fpofs_name(u_int ofs)
+{
+ u_int *p = (u_int *)&dynarec_local + ofs/sizeof(u_int);
+ static char buf[64];
+ switch (ofs) {
+ #define ofscase(x) case LO_##x: return " ; " #x
+ ofscase(next_interupt);
+ ofscase(last_count);
+ ofscase(pending_exception);
+ ofscase(stop);
+ ofscase(address);
+ ofscase(lo);
+ ofscase(hi);
+ ofscase(PC);
+ ofscase(cycle);
+ ofscase(mem_rtab);
+ ofscase(mem_wtab);
+ ofscase(psxH_ptr);
+ ofscase(invc_ptr);
+ ofscase(ram_offset);
+ #undef ofscase
+ }
+ buf[0] = 0;
+ if (psxRegs.GPR.r <= p && p < &psxRegs.GPR.r[32])
+ snprintf(buf, sizeof(buf), " ; r%d", (int)(p - psxRegs.GPR.r));
+ else if (psxRegs.CP0.r <= p && p < &psxRegs.CP0.r[32])
+ snprintf(buf, sizeof(buf), " ; cp0 $%d", (int)(p - psxRegs.CP0.r));
+ else if (psxRegs.CP2D.r <= p && p < &psxRegs.CP2D.r[32])
+ snprintf(buf, sizeof(buf), " ; cp2d $%d", (int)(p - psxRegs.CP2D.r));
+ else if (psxRegs.CP2C.r <= p && p < &psxRegs.CP2C.r[32])
+ snprintf(buf, sizeof(buf), " ; cp2c $%d", (int)(p - psxRegs.CP2C.r));
+ return buf;
+}
#else
#define func_name(x) ""
+#define fpofs_name(x) ""
#endif
#ifdef __i386__
}
alloc_reg(current,i,dops[i].rt1);
}
+ if (!(dops[i].opcode2 & 1)) {
+ alloc_cc(current,i); // for exceptions
+ dirty_reg(current,CCREG);
+ }
}
if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU
if(dops[i].rt1) {
alloc_reg(current,i,dops[i].rt1);
}
}
- if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU
- assert(0);
- }
clear_const(current,dops[i].rs1);
clear_const(current,dops[i].rs2);
clear_const(current,dops[i].rt1);
if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
else dops[i].use_lt1=!!dops[i].rs1;
if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1);
- if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU
- assert(0);
- }
- else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU
+ if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU
clear_const(current,dops[i].rs1);
clear_const(current,dops[i].rt1);
}
set_const(current,dops[i].rt1,v+imm[i]);
}
else clear_const(current,dops[i].rt1);
+ if (dops[i].opcode == 0x08) {
+ alloc_cc(current,i); // for exceptions
+ dirty_reg(current,CCREG);
+ if (dops[i].rt1 == 0) {
+ alloc_reg_temp(current,i,-1);
+ minimum_free_regs[i]=1;
+ }
+ }
}
else {
set_const(current,dops[i].rt1,imm[i]<<16); // LUI
alloc_reg(current, i, ROREG);
if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) {
alloc_reg(current,i,dops[i].rt1);
- assert(get_reg(current->regmap,dops[i].rt1)>=0);
- if(dops[i].opcode==0x27||dops[i].opcode==0x37) // LWU/LD
- {
- assert(0);
- }
- else if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR
- {
- assert(0);
- }
+ assert(get_reg_w(current->regmap, dops[i].rt1)>=0);
dirty_reg(current,dops[i].rt1);
// LWL/LWR need a temporary register for the old value
if(dops[i].opcode==0x22||dops[i].opcode==0x26)
}
alloc_reg_temp(current,i,-1);
minimum_free_regs[i]=1;
- if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR
- {
- assert(0);
- }
}
}
minimum_free_regs[i]=1;
}
-static void c1ls_alloc(struct regstat *current,int i)
-{
- clear_const(current,dops[i].rt1);
- alloc_reg(current,i,CSREG); // Status
-}
-
static void c2ls_alloc(struct regstat *current,int i)
{
clear_const(current,dops[i].rt1);
{
if(dops[i].rt1) {
clear_const(current,dops[i].rt1);
- alloc_all(current,i);
alloc_reg(current,i,dops[i].rt1);
dirty_reg(current,dops[i].rt1);
}
current->u&=~1LL;
alloc_reg(current,i,0);
}
+ minimum_free_regs[i] = HOST_REGS;
}
- else
- {
- // RFE
- assert(dops[i].opcode2==0x10);
- alloc_all(current,i);
- }
- minimum_free_regs[i]=HOST_REGS;
+}
+
+static void rfe_alloc(struct regstat *current, int i)
+{
+ alloc_all(current, i);
+ minimum_free_regs[i] = HOST_REGS;
}
static void cop2_alloc(struct regstat *current,int i)
case COP0:
cop0_alloc(current,i);
break;
- case COP1:
+ case RFE:
+ rfe_alloc(current,i);
break;
case COP2:
cop2_alloc(current,i);
break;
- case C1LS:
- c1ls_alloc(current,i);
- break;
case C2LS:
c2ls_alloc(current,i);
break;
}
}
-static void alu_assemble(int i, const struct regstat *i_regs)
+static void alu_assemble(int i, const struct regstat *i_regs, int ccadj_)
{
if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU
- if(dops[i].rt1) {
- signed char s1,s2,t;
- t=get_reg(i_regs->regmap,dops[i].rt1);
- if(t>=0) {
- s1=get_reg(i_regs->regmap,dops[i].rs1);
- s2=get_reg(i_regs->regmap,dops[i].rs2);
- if(dops[i].rs1&&dops[i].rs2) {
+ int do_oflow = dops[i].may_except; // ADD/SUB with exceptions enabled
+ if (dops[i].rt1 || do_oflow) {
+ int do_exception_check = 0;
+ signed char s1, s2, t, tmp;
+ t = get_reg_w(i_regs->regmap, dops[i].rt1);
+ tmp = get_reg_temp(i_regs->regmap);
+ if (t < 0 && do_oflow)
+ t = tmp;
+ if (t >= 0) {
+ s1 = get_reg(i_regs->regmap, dops[i].rs1);
+ s2 = get_reg(i_regs->regmap, dops[i].rs2);
+ if (dops[i].rs1 && dops[i].rs2) {
assert(s1>=0);
assert(s2>=0);
- if(dops[i].opcode2&2) emit_sub(s1,s2,t);
- else emit_add(s1,s2,t);
+ if (dops[i].opcode2 & 2) {
+ if (do_oflow) {
+ emit_subs(s1, s2, tmp);
+ do_exception_check = 1;
+ }
+ else
+ emit_sub(s1,s2,t);
+ }
+ else {
+ if (do_oflow) {
+ emit_adds(s1, s2, tmp);
+ do_exception_check = 1;
+ }
+ else
+ emit_add(s1,s2,t);
+ }
}
else if(dops[i].rs1) {
if(s1>=0) emit_mov(s1,t);
else emit_loadreg(dops[i].rs1,t);
}
else if(dops[i].rs2) {
- if(s2>=0) {
- if(dops[i].opcode2&2) emit_neg(s2,t);
- else emit_mov(s2,t);
+ if (s2 < 0) {
+ emit_loadreg(dops[i].rs2, t);
+ s2 = t;
}
- else {
- emit_loadreg(dops[i].rs2,t);
- if(dops[i].opcode2&2) emit_neg(t,t);
+ if (dops[i].opcode2 & 2) {
+ if (do_oflow) {
+ emit_negs(s2, tmp);
+ do_exception_check = 1;
+ }
+ else
+ emit_neg(s2, t);
}
+ else if (s2 != t)
+ emit_mov(s2, t);
}
- else emit_zeroreg(t);
+ else
+ emit_zeroreg(t);
+ }
+ if (do_exception_check) {
+ void *jaddr = out;
+ emit_jo(0);
+ if (t >= 0 && tmp != t)
+ emit_mov(tmp, t);
+ add_stub_r(OVERFLOW_STUB, jaddr, out, i, 0, i_regs, ccadj_, 0);
}
}
}
- if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU
- assert(0);
- }
- if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU
+ else if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU
if(dops[i].rt1) {
signed char s1l,s2l,t;
{
- t=get_reg(i_regs->regmap,dops[i].rt1);
+ t=get_reg_w(i_regs->regmap, dops[i].rt1);
//assert(t>=0);
if(t>=0) {
s1l=get_reg(i_regs->regmap,dops[i].rs1);
}
}
}
- if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR
+ else if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR
if(dops[i].rt1) {
signed char s1l,s2l,tl;
- tl=get_reg(i_regs->regmap,dops[i].rt1);
+ tl=get_reg_w(i_regs->regmap, dops[i].rt1);
{
if(tl>=0) {
s1l=get_reg(i_regs->regmap,dops[i].rs1);
}
}
-static void imm16_assemble(int i, const struct regstat *i_regs)
+static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_)
{
if (dops[i].opcode==0x0f) { // LUI
if(dops[i].rt1) {
signed char t;
- t=get_reg(i_regs->regmap,dops[i].rt1);
+ t=get_reg_w(i_regs->regmap, dops[i].rt1);
//assert(t>=0);
if(t>=0) {
if(!((i_regs->isconst>>t)&1))
}
}
if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU
- if(dops[i].rt1) {
- signed char s,t;
- t=get_reg(i_regs->regmap,dops[i].rt1);
+ int is_addi = (dops[i].opcode == 0x08);
+ if (dops[i].rt1 || is_addi) {
+ signed char s, t, tmp;
+ t=get_reg_w(i_regs->regmap, dops[i].rt1);
s=get_reg(i_regs->regmap,dops[i].rs1);
if(dops[i].rs1) {
- //assert(t>=0);
- //assert(s>=0);
+ tmp = get_reg_temp(i_regs->regmap);
+ if (is_addi) {
+ assert(tmp >= 0);
+ if (t < 0) t = tmp;
+ }
if(t>=0) {
if(!((i_regs->isconst>>t)&1)) {
- if(s<0) {
+ int sum, do_exception_check = 0;
+ if (s < 0) {
if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t);
- emit_addimm(t,imm[i],t);
- }else{
- if(!((i_regs->wasconst>>s)&1))
- emit_addimm(s,imm[i],t);
+ if (is_addi) {
+ emit_addimm_and_set_flags3(t, imm[i], tmp);
+ do_exception_check = 1;
+ }
else
- emit_movimm(constmap[i][s]+imm[i],t);
+ emit_addimm(t, imm[i], t);
+ } else {
+ if (!((i_regs->wasconst >> s) & 1)) {
+ if (is_addi) {
+ emit_addimm_and_set_flags3(s, imm[i], tmp);
+ do_exception_check = 1;
+ }
+ else
+ emit_addimm(s, imm[i], t);
+ }
+ else {
+ int oflow = add_overflow(constmap[i][s], imm[i], sum);
+ if (is_addi && oflow)
+ do_exception_check = 2;
+ else
+ emit_movimm(sum, t);
+ }
+ }
+ if (do_exception_check) {
+ void *jaddr = out;
+ if (do_exception_check == 2)
+ emit_jmp(0);
+ else {
+ emit_jo(0);
+ if (tmp != t)
+ emit_mov(tmp, t);
+ }
+ add_stub_r(OVERFLOW_STUB, jaddr, out, i, 0, i_regs, ccadj_, 0);
}
}
}
}
}
}
- if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU
- if(dops[i].rt1) {
- signed char sl,tl;
- tl=get_reg(i_regs->regmap,dops[i].rt1);
- sl=get_reg(i_regs->regmap,dops[i].rs1);
- if(tl>=0) {
- if(dops[i].rs1) {
- assert(sl>=0);
- emit_addimm(sl,imm[i],tl);
- } else {
- emit_movimm(imm[i],tl);
- }
- }
- }
- }
else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU
if(dops[i].rt1) {
//assert(dops[i].rs1!=0); // r0 might be valid, but it's probably a bug
signed char sl,t;
- t=get_reg(i_regs->regmap,dops[i].rt1);
+ t=get_reg_w(i_regs->regmap, dops[i].rt1);
sl=get_reg(i_regs->regmap,dops[i].rs1);
//assert(t>=0);
if(t>=0) {
else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI
if(dops[i].rt1) {
signed char sl,tl;
- tl=get_reg(i_regs->regmap,dops[i].rt1);
+ tl=get_reg_w(i_regs->regmap, dops[i].rt1);
sl=get_reg(i_regs->regmap,dops[i].rs1);
if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
if(dops[i].opcode==0x0c) //ANDI
{
if(dops[i].rt1) {
signed char s,t;
- t=get_reg(i_regs->regmap,dops[i].rt1);
+ t=get_reg_w(i_regs->regmap, dops[i].rt1);
s=get_reg(i_regs->regmap,dops[i].rs1);
//assert(t>=0);
if(t>=0&&!((i_regs->isconst>>t)&1)){
int offset_reg = -1;
int fastio_reg_override = -1;
u_int reglist=get_host_reglist(i_regs->regmap);
- tl=get_reg(i_regs->regmap,dops[i].rt1);
+ tl=get_reg_w(i_regs->regmap, dops[i].rt1);
s=get_reg(i_regs->regmap,dops[i].rs1);
offset=imm[i];
if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
}
//printf("load_assemble: c=%d\n",c);
//if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
- // FIXME: Even if the load is a NOP, we should check for pagefaults...
- if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80))
- ||dops[i].rt1==0) {
+ if(tl<0 && ((!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) || dops[i].rt1==0)) {
// could be FIFO, must perform the read
// ||dummy read
assem_debug("(forced read)\n");
else if (ram_offset && memtarget) {
offset_reg = get_ro_reg(i_regs, 0);
}
- int dummy=(dops[i].rt1==0)||(tl!=get_reg(i_regs->regmap,dops[i].rt1)); // ignore loads to r0 and unneeded reg
+ int dummy=(dops[i].rt1==0)||(tl!=get_reg_w(i_regs->regmap, dops[i].rt1)); // ignore loads to r0 and unneeded reg
switch (dops[i].opcode) {
case 0x20: // LB
if(!c||memtarget) {
default:
assert(0);
}
- }
+ } // tl >= 0
if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG)
host_tempreg_release();
}
int offset_reg = -1;
int fastio_reg_override = -1;
u_int reglist=get_host_reglist(i_regs->regmap);
- tl=get_reg(i_regs->regmap,dops[i].rt1);
+ tl=get_reg_w(i_regs->regmap, dops[i].rt1);
s=get_reg(i_regs->regmap,dops[i].rs1);
temp=get_reg_temp(i_regs->regmap);
temp2=get_reg(i_regs->regmap,FTEMP);
{
if(dops[i].opcode2==0) // MFC0
{
- signed char t=get_reg(i_regs->regmap,dops[i].rt1);
+ signed char t=get_reg_w(i_regs->regmap, dops[i].rt1);
u_int copr=(source[i]>>11)&0x1f;
- //assert(t>=0); // Why does this happen? OOT is weird
if(t>=0&&dops[i].rt1!=0) {
emit_readword(®_cop0[copr],t);
}
}
emit_loadreg(dops[i].rs1,s);
}
- else
- {
- assert(dops[i].opcode2==0x10);
- //if((source[i]&0x3f)==0x10) // RFE
- {
- emit_readword(&psxRegs.CP0.n.SR,0);
- emit_andimm(0,0x3c,1);
- emit_andimm(0,~0xf,0);
- emit_orrshr_imm(1,2,0);
- emit_writeword(0,&psxRegs.CP0.n.SR);
- }
- }
-}
-
-static void cop1_unusable(int i, const struct regstat *i_regs)
-{
- // XXX: should just just do the exception instead
- //if(!cop1_usable)
- {
- void *jaddr=out;
- emit_jmp(0);
- add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
- }
}
-static void cop1_assemble(int i, const struct regstat *i_regs)
+static void rfe_assemble(int i, const struct regstat *i_regs, int ccadj_)
{
- cop1_unusable(i, i_regs);
-}
-
-static void c1ls_assemble(int i, const struct regstat *i_regs)
-{
- cop1_unusable(i, i_regs);
-}
-
-// FP_STUB
-static void do_cop1stub(int n)
-{
- literal_pool(256);
- assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
- set_jump_target(stubs[n].addr, out);
- int i=stubs[n].a;
-// int rs=stubs[n].b;
- struct regstat *i_regs=(struct regstat *)stubs[n].c;
- int ds=stubs[n].d;
- if(!ds) {
- load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
- //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs);
- }
- //else {printf("fp exception in delay slot\n");}
- wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty);
- if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
- emit_movimm(start+(i-ds)*4,0); // Get PC
- emit_addimm(HOST_CCREG,ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
- emit_far_jump(ds?fp_exception_ds:fp_exception);
+ emit_readword(&psxRegs.CP0.n.SR, 0);
+ emit_andimm(0, 0x3c, 1);
+ emit_andimm(0, ~0xf, 0);
+ emit_orrshr_imm(1, 2, 0);
+ emit_writeword(0, &psxRegs.CP0.n.SR);
}
static int cop2_is_stalling_op(int i, int *cycles)
cop2_do_stall_check(0, i, i_regs, reglist);
}
if (dops[i].opcode2==0) { // MFC2
- signed char tl=get_reg(i_regs->regmap,dops[i].rt1);
+ signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1);
if(tl>=0&&dops[i].rt1!=0)
cop2_get_dreg(copr,tl,temp);
}
}
else if (dops[i].opcode2==2) // CFC2
{
- signed char tl=get_reg(i_regs->regmap,dops[i].rt1);
+ signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1);
if(tl>=0&&dops[i].rt1!=0)
emit_readword(®_cop2c[copr],tl);
}
emit_jmp(stubs[n].retaddr); // return address
}
+static void do_overflowstub(int n)
+{
+ assem_debug("do_overflowstub %x\n", start + (u_int)stubs[n].a * 4);
+ literal_pool(24);
+ int i = stubs[n].a;
+ struct regstat *i_regs = (struct regstat *)stubs[n].c;
+ int ccadj = stubs[n].d;
+ set_jump_target(stubs[n].addr, out);
+ wb_dirtys(regs[i].regmap, regs[i].dirty);
+ exception_assemble(i, i_regs, ccadj);
+}
+
#ifndef multdiv_assemble
void multdiv_assemble(int i,struct regstat *i_regs)
{
//if(dops[i].opcode2==0x11||dops[i].opcode2==0x13) { // MTHI/MTLO
if(dops[i].rt1) {
signed char sl,tl;
- tl=get_reg(i_regs->regmap,dops[i].rt1);
+ tl=get_reg_w(i_regs->regmap, dops[i].rt1);
//assert(tl>=0);
if(tl>=0) {
sl=get_reg(i_regs->regmap,dops[i].rs1);
emit_far_jump(jump_to_new_pc);
}
-static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_)
{
// 'break' tends to be littered around to catch things like
// division by 0 and is almost never executed, so don't emit much code here
- void *func = (dops[i].opcode2 == 0x0C)
- ? (is_delayslot ? jump_syscall_ds : jump_syscall)
- : (is_delayslot ? jump_break_ds : jump_break);
+ void *func;
+ if (dops[i].itype == ALU || dops[i].itype == IMM16)
+ func = is_delayslot ? jump_overflow_ds : jump_overflow;
+ else if (dops[i].opcode2 == 0x0C)
+ func = is_delayslot ? jump_syscall_ds : jump_syscall;
+ else
+ func = is_delayslot ? jump_break_ds : jump_break;
assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG);
emit_movimm(start + i*4, 2); // pc
emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG);
static void hlecall_bad()
{
- SysPrintf("bad hlecall\n");
+ assert(0);
}
static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_)
// fallthrough
case IMM16:
if(dops[i].rt1&&is_const(®s[i],dops[i].rt1)) {
- int value,hr=get_reg(regs[i].regmap,dops[i].rt1);
+ int value,hr=get_reg_w(regs[i].regmap, dops[i].rt1);
if(hr>=0) {
if(get_final_value(hr,i,&value))
smrv[dops[i].rt1]=value;
int ds = 0;
switch (dops[i].itype) {
case ALU:
- alu_assemble(i, i_regs);
+ alu_assemble(i, i_regs, ccadj_);
break;
case IMM16:
- imm16_assemble(i, i_regs);
+ imm16_assemble(i, i_regs, ccadj_);
break;
case SHIFT:
shift_assemble(i, i_regs);
case COP0:
cop0_assemble(i, i_regs, ccadj_);
break;
- case COP1:
- cop1_assemble(i, i_regs);
- break;
- case C1LS:
- c1ls_assemble(i, i_regs);
+ case RFE:
+ rfe_assemble(i, i_regs, ccadj_);
break;
case COP2:
cop2_assemble(i, i_regs);
mov_assemble(i, i_regs);
break;
case SYSCALL:
- syscall_assemble(i, i_regs, ccadj_);
+ exception_assemble(i, i_regs, ccadj_);
break;
case HLECALL:
hlecall_assemble(i, i_regs, ccadj_);
break;
case NOP:
case OTHER:
- case NI:
// not handled, just skip
break;
default:
int ra=-1;
int agr=AGEN1+(i&1);
if(dops[i].itype==LOAD) {
- ra=get_reg(i_regs->regmap,dops[i].rt1);
+ ra=get_reg_w(i_regs->regmap, dops[i].rt1);
if(ra<0) ra=get_reg_temp(i_regs->regmap);
assert(ra>=0);
}
int rt,return_address;
assert(dops[i+1].rt1!=dops[i].rt1);
assert(dops[i+1].rt2!=dops[i].rt1);
- rt=get_reg(branch_regs[i].regmap,dops[i].rt1);
+ rt=get_reg_w(branch_regs[i].regmap, dops[i].rt1);
assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
assert(rt>=0);
return_address=start+i*4+8;
//assert(adj==0);
emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG);
add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs);
- if(dops[i+1].itype==COP0 && dops[i+1].opcode2==0x10)
+ if (dops[i+1].itype == RFE)
// special case for RFE
emit_jmp(0);
else
printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC0
else printf (" %x: %s\n",start+i*4,insn[i]);
break;
- case COP1:
- if(dops[i].opcode2<3)
- printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC1
- else if(dops[i].opcode2>3)
- printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC1
- else printf (" %x: %s\n",start+i*4,insn[i]);
- break;
case COP2:
if(dops[i].opcode2<3)
printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC2
printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC2
else printf (" %x: %s\n",start+i*4,insn[i]);
break;
- case C1LS:
- printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]);
- break;
case C2LS:
printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]);
break;
//printf (" %s %8x\n",insn[i],source[i]);
printf (" %x: %s\n",start+i*4,insn[i]);
}
+ #ifndef REGMAP_PRINT
return;
- printf("D: %"PRIu64" WD: %"PRIu64" U: %"PRIu64"\n",
- regs[i].dirty, regs[i].wasdirty, unneeded_reg[i]);
+ #endif
+ printf("D: %"PRIx64" WD: %"PRIx64" U: %"PRIx64" hC: %x hWC: %x hLC: %x\n",
+ regs[i].dirty, regs[i].wasdirty, unneeded_reg[i],
+ regs[i].isconst, regs[i].wasconst, regs[i].loadedconst);
print_regmap("pre: ", regmap_pre[i]);
print_regmap("entry: ", regs[i].regmap_entry);
print_regmap("map: ", regs[i].regmap);
static noinline void pass1_disassemble(u_int pagelimit)
{
int i, j, done = 0, ni_count = 0;
- unsigned int type,op,op2;
+ unsigned int type,op,op2,op3;
for (i = 0; !done; i++)
{
+ int force_prev_to_interpreter = 0;
memset(&dops[i], 0, sizeof(dops[i]));
- op2=0;
- minimum_free_regs[i]=0;
- dops[i].opcode=op=source[i]>>26;
+ op2 = 0;
+ minimum_free_regs[i] = 0;
+ dops[i].opcode = op = source[i] >> 26;
+ type = INTCALL;
+ set_mnemonic(i, "???");
switch(op)
{
- case 0x00: set_mnemonic(i, "special"); type=NI;
+ case 0x00: set_mnemonic(i, "special");
op2=source[i]&0x3f;
switch(op2)
{
case 0x27: set_mnemonic(i, "NOR"); type=ALU; break;
case 0x2A: set_mnemonic(i, "SLT"); type=ALU; break;
case 0x2B: set_mnemonic(i, "SLTU"); type=ALU; break;
- case 0x30: set_mnemonic(i, "TGE"); type=NI; break;
- case 0x31: set_mnemonic(i, "TGEU"); type=NI; break;
- case 0x32: set_mnemonic(i, "TLT"); type=NI; break;
- case 0x33: set_mnemonic(i, "TLTU"); type=NI; break;
- case 0x34: set_mnemonic(i, "TEQ"); type=NI; break;
- case 0x36: set_mnemonic(i, "TNE"); type=NI; break;
-#if 0
- case 0x14: set_mnemonic(i, "DSLLV"); type=SHIFT; break;
- case 0x16: set_mnemonic(i, "DSRLV"); type=SHIFT; break;
- case 0x17: set_mnemonic(i, "DSRAV"); type=SHIFT; break;
- case 0x1C: set_mnemonic(i, "DMULT"); type=MULTDIV; break;
- case 0x1D: set_mnemonic(i, "DMULTU"); type=MULTDIV; break;
- case 0x1E: set_mnemonic(i, "DDIV"); type=MULTDIV; break;
- case 0x1F: set_mnemonic(i, "DDIVU"); type=MULTDIV; break;
- case 0x2C: set_mnemonic(i, "DADD"); type=ALU; break;
- case 0x2D: set_mnemonic(i, "DADDU"); type=ALU; break;
- case 0x2E: set_mnemonic(i, "DSUB"); type=ALU; break;
- case 0x2F: set_mnemonic(i, "DSUBU"); type=ALU; break;
- case 0x38: set_mnemonic(i, "DSLL"); type=SHIFTIMM; break;
- case 0x3A: set_mnemonic(i, "DSRL"); type=SHIFTIMM; break;
- case 0x3B: set_mnemonic(i, "DSRA"); type=SHIFTIMM; break;
- case 0x3C: set_mnemonic(i, "DSLL32"); type=SHIFTIMM; break;
- case 0x3E: set_mnemonic(i, "DSRL32"); type=SHIFTIMM; break;
- case 0x3F: set_mnemonic(i, "DSRA32"); type=SHIFTIMM; break;
-#endif
}
break;
- case 0x01: set_mnemonic(i, "regimm"); type=NI;
- op2=(source[i]>>16)&0x1f;
+ case 0x01: set_mnemonic(i, "regimm");
+ type = SJUMP;
+ op2 = (source[i] >> 16) & 0x1f;
switch(op2)
{
- case 0x00: set_mnemonic(i, "BLTZ"); type=SJUMP; break;
- case 0x01: set_mnemonic(i, "BGEZ"); type=SJUMP; break;
- //case 0x02: set_mnemonic(i, "BLTZL"); type=SJUMP; break;
- //case 0x03: set_mnemonic(i, "BGEZL"); type=SJUMP; break;
- //case 0x08: set_mnemonic(i, "TGEI"); type=NI; break;
- //case 0x09: set_mnemonic(i, "TGEIU"); type=NI; break;
- //case 0x0A: set_mnemonic(i, "TLTI"); type=NI; break;
- //case 0x0B: set_mnemonic(i, "TLTIU"); type=NI; break;
- //case 0x0C: set_mnemonic(i, "TEQI"); type=NI; break;
- //case 0x0E: set_mnemonic(i, "TNEI"); type=NI; break;
- case 0x10: set_mnemonic(i, "BLTZAL"); type=SJUMP; break;
- case 0x11: set_mnemonic(i, "BGEZAL"); type=SJUMP; break;
- //case 0x12: set_mnemonic(i, "BLTZALL"); type=SJUMP; break;
- //case 0x13: set_mnemonic(i, "BGEZALL"); type=SJUMP; break;
+ case 0x10: set_mnemonic(i, "BLTZAL"); break;
+ case 0x11: set_mnemonic(i, "BGEZAL"); break;
+ default:
+ if (op2 & 1)
+ set_mnemonic(i, "BGEZ");
+ else
+ set_mnemonic(i, "BLTZ");
}
break;
case 0x02: set_mnemonic(i, "J"); type=UJUMP; break;
case 0x0D: set_mnemonic(i, "ORI"); type=IMM16; break;
case 0x0E: set_mnemonic(i, "XORI"); type=IMM16; break;
case 0x0F: set_mnemonic(i, "LUI"); type=IMM16; break;
- case 0x10: set_mnemonic(i, "cop0"); type=NI;
- op2=(source[i]>>21)&0x1f;
+ case 0x10: set_mnemonic(i, "COP0");
+ op2 = (source[i]>>21) & 0x1f;
+ if (op2 & 0x10) {
+ op3 = source[i] & 0x1f;
+ switch (op3)
+ {
+ case 0x01: case 0x02: case 0x06: case 0x08: type = INTCALL; break;
+ case 0x10: set_mnemonic(i, "RFE"); type=RFE; break;
+ default: type = OTHER; break;
+ }
+ break;
+ }
switch(op2)
{
- case 0x00: set_mnemonic(i, "MFC0"); type=COP0; break;
- case 0x02: set_mnemonic(i, "CFC0"); type=COP0; break;
+ u32 rd;
+ case 0x00:
+ set_mnemonic(i, "MFC0");
+ rd = (source[i] >> 11) & 0x1F;
+ if (!(0x00000417u & (1u << rd)))
+ type = COP0;
+ break;
case 0x04: set_mnemonic(i, "MTC0"); type=COP0; break;
- case 0x06: set_mnemonic(i, "CTC0"); type=COP0; break;
- case 0x10: set_mnemonic(i, "RFE"); type=COP0; break;
+ case 0x02:
+ case 0x06: type = INTCALL; break;
+ default: type = OTHER; break;
}
break;
- case 0x11: set_mnemonic(i, "cop1"); type=COP1;
+ case 0x11: set_mnemonic(i, "COP1");
op2=(source[i]>>21)&0x1f;
break;
-#if 0
- case 0x14: set_mnemonic(i, "BEQL"); type=CJUMP; break;
- case 0x15: set_mnemonic(i, "BNEL"); type=CJUMP; break;
- case 0x16: set_mnemonic(i, "BLEZL"); type=CJUMP; break;
- case 0x17: set_mnemonic(i, "BGTZL"); type=CJUMP; break;
- case 0x18: set_mnemonic(i, "DADDI"); type=IMM16; break;
- case 0x19: set_mnemonic(i, "DADDIU"); type=IMM16; break;
- case 0x1A: set_mnemonic(i, "LDL"); type=LOADLR; break;
- case 0x1B: set_mnemonic(i, "LDR"); type=LOADLR; break;
-#endif
- case 0x20: set_mnemonic(i, "LB"); type=LOAD; break;
- case 0x21: set_mnemonic(i, "LH"); type=LOAD; break;
- case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break;
- case 0x23: set_mnemonic(i, "LW"); type=LOAD; break;
- case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break;
- case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break;
- case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break;
-#if 0
- case 0x27: set_mnemonic(i, "LWU"); type=LOAD; break;
-#endif
- case 0x28: set_mnemonic(i, "SB"); type=STORE; break;
- case 0x29: set_mnemonic(i, "SH"); type=STORE; break;
- case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break;
- case 0x2B: set_mnemonic(i, "SW"); type=STORE; break;
-#if 0
- case 0x2C: set_mnemonic(i, "SDL"); type=STORELR; break;
- case 0x2D: set_mnemonic(i, "SDR"); type=STORELR; break;
-#endif
- case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break;
- case 0x2F: set_mnemonic(i, "CACHE"); type=NOP; break;
- case 0x30: set_mnemonic(i, "LL"); type=NI; break;
- case 0x31: set_mnemonic(i, "LWC1"); type=C1LS; break;
-#if 0
- case 0x34: set_mnemonic(i, "LLD"); type=NI; break;
- case 0x35: set_mnemonic(i, "LDC1"); type=C1LS; break;
- case 0x37: set_mnemonic(i, "LD"); type=LOAD; break;
-#endif
- case 0x38: set_mnemonic(i, "SC"); type=NI; break;
- case 0x39: set_mnemonic(i, "SWC1"); type=C1LS; break;
-#if 0
- case 0x3C: set_mnemonic(i, "SCD"); type=NI; break;
- case 0x3D: set_mnemonic(i, "SDC1"); type=C1LS; break;
- case 0x3F: set_mnemonic(i, "SD"); type=STORE; break;
-#endif
- case 0x12: set_mnemonic(i, "COP2"); type=NI;
+ case 0x12: set_mnemonic(i, "COP2");
op2=(source[i]>>21)&0x1f;
- //if (op2 & 0x10)
- if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns
+ if (op2 & 0x10) {
+ type = OTHER;
if (gte_handlers[source[i]&0x3f]!=NULL) {
#ifdef DISASM
if (gte_regnames[source[i]&0x3f]!=NULL)
else
snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
#endif
- type=C2OP;
+ type = C2OP;
}
}
else switch(op2)
case 0x06: set_mnemonic(i, "CTC2"); type=COP2; break;
}
break;
+ case 0x13: set_mnemonic(i, "COP3");
+ op2=(source[i]>>21)&0x1f;
+ break;
+ case 0x20: set_mnemonic(i, "LB"); type=LOAD; break;
+ case 0x21: set_mnemonic(i, "LH"); type=LOAD; break;
+ case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break;
+ case 0x23: set_mnemonic(i, "LW"); type=LOAD; break;
+ case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break;
+ case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break;
+ case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break;
+ case 0x28: set_mnemonic(i, "SB"); type=STORE; break;
+ case 0x29: set_mnemonic(i, "SH"); type=STORE; break;
+ case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break;
+ case 0x2B: set_mnemonic(i, "SW"); type=STORE; break;
+ case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break;
case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; break;
case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break;
- case 0x3B: set_mnemonic(i, "HLECALL"); type=HLECALL; break;
- default: set_mnemonic(i, "???"); type=NI;
- SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start);
+ case 0x3B:
+ if (Config.HLE && (source[i] & 0x03ffffff) < ARRAY_SIZE(psxHLEt)) {
+ set_mnemonic(i, "HLECALL");
+ type = HLECALL;
+ }
+ break;
+ default:
break;
}
+ if (type == INTCALL)
+ SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start);
dops[i].itype=type;
dops[i].opcode2=op2;
/* Get registers/immediates */
dops[i].use_lt1=0;
gte_rs[i]=gte_rt[i]=0;
+ dops[i].rs1 = 0;
+ dops[i].rs2 = 0;
+ dops[i].rt1 = 0;
+ dops[i].rt2 = 0;
switch(type) {
case LOAD:
dops[i].rs1=(source[i]>>21)&0x1f;
- dops[i].rs2=0;
dops[i].rt1=(source[i]>>16)&0x1f;
- dops[i].rt2=0;
imm[i]=(short)source[i];
break;
case STORE:
case STORELR:
dops[i].rs1=(source[i]>>21)&0x1f;
dops[i].rs2=(source[i]>>16)&0x1f;
- dops[i].rt1=0;
- dops[i].rt2=0;
imm[i]=(short)source[i];
break;
case LOADLR:
dops[i].rs1=(source[i]>>21)&0x1f;
dops[i].rs2=(source[i]>>16)&0x1f;
dops[i].rt1=(source[i]>>16)&0x1f;
- dops[i].rt2=0;
imm[i]=(short)source[i];
break;
case IMM16:
else dops[i].rs1=(source[i]>>21)&0x1f;
dops[i].rs2=0;
dops[i].rt1=(source[i]>>16)&0x1f;
- dops[i].rt2=0;
if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI
imm[i]=(unsigned short)source[i];
}else{
}
break;
case UJUMP:
- dops[i].rs1=0;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
// The JAL instruction writes to r31.
if (op&1) {
dops[i].rt1=31;
break;
case RJUMP:
dops[i].rs1=(source[i]>>21)&0x1f;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
// The JALR instruction writes to rd.
if (op2&1) {
dops[i].rt1=(source[i]>>11)&0x1f;
case CJUMP:
dops[i].rs1=(source[i]>>21)&0x1f;
dops[i].rs2=(source[i]>>16)&0x1f;
- dops[i].rt1=0;
- dops[i].rt2=0;
if(op&2) { // BGTZ/BLEZ
dops[i].rs2=0;
}
case SJUMP:
dops[i].rs1=(source[i]>>21)&0x1f;
dops[i].rs2=CCREG;
- dops[i].rt1=0;
- dops[i].rt2=0;
- if(op2&0x10) { // BxxAL
- dops[i].rt1=31;
+ if (op2 == 0x10 || op2 == 0x11) { // BxxAL
+ dops[i].rt1 = 31;
// NOTE: If the branch is not taken, r31 is still overwritten
}
break;
dops[i].rs1=(source[i]>>21)&0x1f; // source
dops[i].rs2=(source[i]>>16)&0x1f; // subtract amount
dops[i].rt1=(source[i]>>11)&0x1f; // destination
- dops[i].rt2=0;
break;
case MULTDIV:
dops[i].rs1=(source[i]>>21)&0x1f; // source
dops[i].rt2=LOREG;
break;
case MOV:
- dops[i].rs1=0;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
if(op2==0x10) dops[i].rs1=HIREG; // MFHI
if(op2==0x11) dops[i].rt1=HIREG; // MTHI
if(op2==0x12) dops[i].rs1=LOREG; // MFLO
dops[i].rs1=(source[i]>>16)&0x1f; // target of shift
dops[i].rs2=(source[i]>>21)&0x1f; // shift amount
dops[i].rt1=(source[i]>>11)&0x1f; // destination
- dops[i].rt2=0;
break;
case SHIFTIMM:
dops[i].rs1=(source[i]>>16)&0x1f;
dops[i].rs2=0;
dops[i].rt1=(source[i]>>11)&0x1f;
- dops[i].rt2=0;
imm[i]=(source[i]>>6)&0x1f;
- // DSxx32 instructions
- if(op2>=0x3c) imm[i]|=0x20;
break;
case COP0:
- dops[i].rs1=0;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
- if(op2==0||op2==2) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0/CFC0
- if(op2==4||op2==6) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0/CTC0
+ if(op2==0) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0
+ if(op2==4) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0
if(op2==4&&((source[i]>>11)&0x1f)==12) dops[i].rt2=CSREG; // Status
- if(op2==16) if((source[i]&0x3f)==0x18) dops[i].rs2=CCREG; // ERET
- break;
- case COP1:
- dops[i].rs1=0;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
- if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1
- if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1
- dops[i].rs2=CSREG;
break;
case COP2:
- dops[i].rs1=0;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC2/CFC2
if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC2/CTC2
dops[i].rs2=CSREG;
case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
}
break;
- case C1LS:
- dops[i].rs1=(source[i]>>21)&0x1F;
- dops[i].rs2=CSREG;
- dops[i].rt1=0;
- dops[i].rt2=0;
- imm[i]=(short)source[i];
- break;
case C2LS:
dops[i].rs1=(source[i]>>21)&0x1F;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
imm[i]=(short)source[i];
if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2
else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2
break;
case C2OP:
- dops[i].rs1=0;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
gte_rs[i]=gte_reg_reads[source[i]&0x3f];
gte_rt[i]=gte_reg_writes[source[i]&0x3f];
gte_rt[i]|=1ll<<63; // every op changes flags
case HLECALL:
case INTCALL:
dops[i].rs1=CCREG;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
break;
default:
- dops[i].rs1=0;
- dops[i].rs2=0;
- dops[i].rt1=0;
- dops[i].rt2=0;
+ break;
}
/* Calculate branch target addresses */
if(type==UJUMP)
dops[i].is_jump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP);
dops[i].is_ujump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP); // || (source[i] >> 16) == 0x1000 // beq r0,r0
dops[i].is_load = (dops[i].itype == LOAD || dops[i].itype == LOADLR || op == 0x32); // LWC2
+ dops[i].is_delay_load = (dops[i].is_load || (source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC
dops[i].is_store = (dops[i].itype == STORE || dops[i].itype == STORELR || op == 0x3a); // SWC2
+ dops[i].is_exception = (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL);
+ dops[i].may_except = dops[i].is_exception || (dops[i].itype == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8;
- /* messy cases to just pass over to the interpreter */
+ /* rare messy cases to just pass over to the interpreter */
if (i > 0 && dops[i-1].is_jump) {
- int do_in_intrp=0;
// branch in delay slot?
if (dops[i].is_jump) {
// don't handle first branch and call interpreter if it's hit
- SysPrintf("branch in delay slot @%08x (%08x)\n", start + i*4, start);
- do_in_intrp=1;
+ SysPrintf("branch in DS @%08x (%08x)\n", start + i*4, start);
+ force_prev_to_interpreter = 1;
}
- // basic load delay detection
- else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&dops[i].rt1!=0) {
+ // basic load delay detection through a branch
+ else if (dops[i].is_delay_load && dops[i].rt1 != 0) {
int t=(ba[i-1]-start)/4;
if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) {
// jump target wants DS result - potential load delay effect
- SysPrintf("load delay @%08x (%08x)\n", start + i*4, start);
- do_in_intrp=1;
+ SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start);
+ force_prev_to_interpreter = 1;
dops[t+1].bt=1; // expected return from interpreter
}
else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&&
!(i>=3&&dops[i-3].is_jump)) {
// v0 overwrite like this is a sign of trouble, bail out
SysPrintf("v0 overwrite @%08x (%08x)\n", start + i*4, start);
- do_in_intrp=1;
+ force_prev_to_interpreter = 1;
}
}
- if (do_in_intrp) {
- memset(&dops[i-1], 0, sizeof(dops[i-1]));
- dops[i-1].itype = INTCALL;
- dops[i-1].rs1 = CCREG;
- ba[i-1] = -1;
- done = 2;
- i--; // don't compile the DS
- }
+ }
+ else if (i > 0 && dops[i-1].is_delay_load && dops[i-1].rt1 != 0
+ && (dops[i].rs1 == dops[i-1].rt1 || dops[i].rs2 == dops[i-1].rt1)) {
+ SysPrintf("load delay @%08x (%08x)\n", start + i*4, start);
+ force_prev_to_interpreter = 1;
+ }
+ if (force_prev_to_interpreter) {
+ memset(&dops[i-1], 0, sizeof(dops[i-1]));
+ dops[i-1].itype = INTCALL;
+ dops[i-1].rs1 = CCREG;
+ ba[i-1] = -1;
+ done = 2;
+ i--; // don't compile the DS/problematic load/etc
}
/* Is this the end of the block? */
// Don't get too close to the limit
if(i>MAXBLOCK/2) done=1;
}
- if (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL)
+ if (dops[i].itype == HLECALL)
+ stop = 1;
+ else if (dops[i].itype == INTCALL)
+ stop = 2;
+ else if (dops[i].is_exception)
done = stop_after_jal ? 1 : 2;
if (done == 2) {
// Does the block continue due to a branch?
assert(start+i*4<pagelimit);
if (i==MAXBLOCK-1) done=1;
// Stop if we're compiling junk
- if(dops[i].itype == NI && (++ni_count > 8 || dops[i].opcode == 0x11)) {
+ if (dops[i].itype == INTCALL && (++ni_count > 8 || dops[i].opcode == 0x11)) {
done=stop_after_jal=1;
SysPrintf("Disabled speculative precompilation\n");
}
}
}
}
- else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL)
+ else if(dops[i].may_except)
{
- // SYSCALL instruction (software interrupt)
+ // SYSCALL instruction, etc or conditional exception
u=1;
}
- else if(dops[i].itype==COP0 && dops[i].opcode2==0x10)
+ else if (dops[i].itype == RFE)
{
- // RFE
u=1;
}
//u=1; // DEBUG
delayslot_alloc(¤t,i+1);
//current.isconst=0; // DEBUG
ds=1;
- //printf("i=%d, isconst=%x\n",i,current.isconst);
break;
case RJUMP:
//current.isconst=0;
//current.isconst=0;
break;
case SJUMP:
- //current.isconst=0;
- //current.wasconst=0;
- //regs[i].wasconst=0;
clear_const(¤t,dops[i].rs1);
clear_const(¤t,dops[i].rt1);
- //if((dops[i].opcode2&0x1E)==0x0) // BLTZ/BGEZ
- if((dops[i].opcode2&0x0E)==0x0) // BLTZ/BGEZ
{
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
if (dops[i].rt1==31) { // BLTZAL/BGEZAL
alloc_reg(¤t,i,31);
dirty_reg(¤t,31);
- //#ifdef REG_PREFETCH
- //alloc_reg(¤t,i,PTEMP);
- //#endif
}
if((dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) // The delay slot overwrites the branch condition.
||(dops[i].rt1==31&&(dops[i+1].rs1==31||dops[i+1].rs2==31||dops[i+1].rt1==31||dops[i+1].rt2==31))) { // DS touches $ra
delayslot_alloc(¤t,i+1);
}
}
- else
- // Don't alloc the delay slot yet because we might not execute it
- if((dops[i].opcode2&0x1E)==0x2) // BLTZL/BGEZL
- {
- current.isconst=0;
- current.wasconst=0;
- regs[i].wasconst=0;
- alloc_cc(¤t,i);
- dirty_reg(¤t,CCREG);
- alloc_reg(¤t,i,dops[i].rs1);
- }
ds=1;
//current.isconst=0;
break;
case COP0:
cop0_alloc(¤t,i);
break;
- case COP1:
+ case RFE:
+ rfe_alloc(¤t,i);
break;
case COP2:
cop2_alloc(¤t,i);
break;
- case C1LS:
- c1ls_alloc(¤t,i);
- break;
case C2LS:
c2ls_alloc(¤t,i);
break;
}
break;
case SJUMP:
- //if((dops[i-1].opcode2&0x1E)==0) // BLTZ/BGEZ
- if((dops[i-1].opcode2&0x0E)==0) // BLTZ/BGEZ
{
alloc_cc(¤t,i-1);
dirty_reg(¤t,CCREG);
memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap));
memcpy(constmap[i],constmap[i-1],sizeof(constmap[i]));
}
- else
- // Alloc the delay slot in case the branch is taken
- if((dops[i-1].opcode2&0x1E)==2) // BLTZL/BGEZL
- {
- memcpy(&branch_regs[i-1],¤t,sizeof(current));
- branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<dops[i].rs1)|(1LL<<dops[i].rs2)|(1LL<<dops[i].rt1)|(1LL<<dops[i].rt2)))|1;
- alloc_cc(&branch_regs[i-1],i);
- dirty_reg(&branch_regs[i-1],CCREG);
- delayslot_alloc(&branch_regs[i-1],i);
- branch_regs[i-1].isconst=0;
- alloc_reg(¤t,i,CCREG); // Not taken path
- dirty_reg(¤t,CCREG);
- memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
- }
// FIXME: BLTZAL/BGEZAL
- if(dops[i-1].opcode2&0x10) { // BxxZAL
+ if ((dops[i-1].opcode2 & 0x1e) == 0x10) { // BxxZAL
alloc_reg(&branch_regs[i-1],i-1,31);
dirty_reg(&branch_regs[i-1],31);
}
// Count cycles in between branches
ccadj[i] = CLOCK_ADJUST(cc);
- if (i > 0 && (dops[i-1].is_jump || dops[i].itype == SYSCALL || dops[i].itype == HLECALL))
+ if (i > 0 && (dops[i-1].is_jump || dops[i].is_exception))
{
cc=0;
}
nr |= get_regm(regs[i].regmap_entry, INVCP);
}
}
- else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL)
+ else if (dops[i].may_except)
{
- // SYSCALL instruction (software interrupt)
- nr=0;
- }
- else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18)
- {
- // ERET instruction (return from interrupt)
+ // SYSCALL instruction, etc or conditional exception
nr=0;
}
else // Non-branch
if(ba[i]>=start && ba[i]<(start+i*4))
if(dops[i+1].itype==NOP||dops[i+1].itype==MOV||dops[i+1].itype==ALU
||dops[i+1].itype==SHIFTIMM||dops[i+1].itype==IMM16||dops[i+1].itype==LOAD
- ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR||dops[i+1].itype==C1LS
- ||dops[i+1].itype==SHIFT||dops[i+1].itype==COP1
+ ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR
+ ||dops[i+1].itype==SHIFT
||dops[i+1].itype==COP2||dops[i+1].itype==C2LS||dops[i+1].itype==C2OP)
{
int t=(ba[i]-start)>>2;
}
}
}
- if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=C1LS&&dops[i].itype!=SHIFT&&
+ if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=SHIFT&&
dops[i].itype!=NOP&&dops[i].itype!=MOV&&dops[i].itype!=ALU&&dops[i].itype!=SHIFTIMM&&
- dops[i].itype!=IMM16&&dops[i].itype!=LOAD&&dops[i].itype!=COP1)
+ dops[i].itype!=IMM16&&dops[i].itype!=LOAD)
{
memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap));
}
if(!dops[i+1].bt)
{
if(dops[i].itype==ALU||dops[i].itype==MOV||dops[i].itype==LOAD||dops[i].itype==SHIFTIMM||dops[i].itype==IMM16
- ||((dops[i].itype==COP1||dops[i].itype==COP2)&&dops[i].opcode2<3))
+ ||(dops[i].itype==COP2&&dops[i].opcode2<3))
{
if(dops[i+1].rs1) {
if((hr=get_reg(regs[i+1].regmap,dops[i+1].rs1))>=0)
}
// Preload target address for load instruction (non-constant)
if(dops[i+1].itype==LOAD&&dops[i+1].rs1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) {
- if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0)
+ if((hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1))>=0)
{
if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0)
{
}
// Load source into target register
if(dops[i+1].use_lt1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) {
- if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0)
+ if((hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1))>=0)
{
if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0)
{
}
}
}
- if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR/*||dops[i+1].itype==C1LS||||dops[i+1].itype==C2LS*/) {
+ if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR/*||dops[i+1].itype==C2LS*/) {
hr = -1;
if(dops[i+1].itype==LOAD)
- hr=get_reg(regs[i+1].regmap,dops[i+1].rt1);
+ hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1);
if(dops[i+1].itype==LOADLR||(dops[i+1].opcode&0x3b)==0x31||(dops[i+1].opcode&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2
hr=get_reg(regs[i+1].regmap,FTEMP);
if(dops[i+1].itype==STORE||dops[i+1].itype==STORELR||(dops[i+1].opcode&0x3b)==0x39||(dops[i+1].opcode&0x3b)==0x3a) { // SWC1/SDC1/SWC2/SDC2
}
}
}
- else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL)
- {
- // SYSCALL instruction (software interrupt)
- will_dirty_i=0;
- wont_dirty_i=0;
- }
- else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18)
+ else if (dops[i].may_except)
{
- // ERET instruction (return from interrupt)
+ // SYSCALL instruction, etc or conditional exception
will_dirty_i=0;
wont_dirty_i=0;
}
assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out);
+ if (addr & 3) {
+ if (addr != hack_addr) {
+ SysPrintf("game crash @%08x, ra=%08x\n", addr, psxRegs.GPR.n.ra);
+ hack_addr = addr;
+ }
+ return -1;
+ }
+
// this is just for speculation
for (i = 1; i < 32; i++) {
if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000)
state_rflags |= 1 << i;
}
- assert(!(addr & 3));
- start = addr & ~3;
+ start = addr;
new_dynarec_did_compile=1;
if (Config.HLE && start == 0x80001000) // hlecall
{
emit_jmp(0);
}
- // TODO: delay slot stubs?
// Stubs
- for(i=0;i<stubcount;i++)
+ for(i = 0; i < stubcount; i++)
{
switch(stubs[i].type)
{
case LOADB_STUB:
case LOADH_STUB:
case LOADW_STUB:
- case LOADD_STUB:
case LOADBU_STUB:
case LOADHU_STUB:
do_readstub(i);break;
case STOREB_STUB:
case STOREH_STUB:
case STOREW_STUB:
- case STORED_STUB:
do_writestub(i);break;
case CC_STUB:
do_ccstub(i);break;
case INVCODE_STUB:
do_invstub(i);break;
- case FP_STUB:
- do_cop1stub(i);break;
case STORELR_STUB:
do_unalignedwritestub(i);break;
+ case OVERFLOW_STUB:
+ do_overflowstub(i); break;
+ default:
+ assert(0);
}
}
diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c
-index 10d99ba..1e097ae 100644
+index 89716fa0..02a8d7c5 100644
--- a/libpcsxcore/new_dynarec/emu_if.c
+++ b/libpcsxcore/new_dynarec/emu_if.c
-@@ -405,13 +407,17 @@ static void ari64_shutdown()
+@@ -320,13 +320,18 @@ static void ari64_shutdown()
{
new_dynarec_cleanup();
new_dyna_pcsx_mem_shutdown();
+ (void)ari64_execute;
++ (void)ari64_execute_block;
}
+extern void intExecuteT();
ari64_init,
ari64_reset,
- ari64_execute,
-- ari64_execute_until,
+- ari64_execute_block,
+ intExecuteT,
+ intExecuteBlockT,
ari64_clear,
ari64_notify,
ari64_apply_config,
-@@ -481,7 +487,7 @@ static u32 memcheck_read(u32 a)
+@@ -395,7 +400,7 @@ static u32 memcheck_read(u32 a)
return *(u32 *)(psxM + (a & 0x1ffffc));
}
{
static psxRegisters oldregs;
diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c
-index bb471b6..8f68a3b 100644
+index 190f8fc7..5feb7a02 100644
--- a/libpcsxcore/new_dynarec/pcsxmem.c
+++ b/libpcsxcore/new_dynarec/pcsxmem.c
-@@ -272,6 +272,8 @@ static void write_biu(u32 value)
- if (address != 0xfffe0130)
+@@ -289,6 +289,8 @@ static void write_biu(u32 value)
return;
+ }
+extern u32 handler_cycle;
+handler_cycle = psxRegs.cycle;
- switch (value) {
- case 0x800: case 0x804:
- unmap_ram_write();
+ memprintf("write_biu %08x @%08x %u\n", value, psxRegs.pc, psxRegs.cycle);
+ psxRegs.biuReg = value;
+ }
diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c
-index ff0efbc..4459644 100644
+index 18bd6a4e..bc2eb3f6 100644
--- a/libpcsxcore/psxcounters.c
+++ b/libpcsxcore/psxcounters.c
-@@ -379,9 +379,12 @@ void psxRcntUpdate()
+@@ -389,9 +389,12 @@ void psxRcntUpdate()
/******************************************************************************/
_psxRcntWcount( index, value );
psxRcntSet();
-@@ -390,6 +393,7 @@ void psxRcntWcount( u32 index, u32 value )
+@@ -400,6 +403,7 @@ void psxRcntWcount( u32 index, u32 value )
void psxRcntWmode( u32 index, u32 value )
{
verboseLog( 1, "[RCNT %i] wmode: %x\n", index, value );
_psxRcntWmode( index, value );
_psxRcntWcount( index, 0 );
-@@ -401,6 +405,7 @@ void psxRcntWmode( u32 index, u32 value )
+@@ -411,6 +415,7 @@ void psxRcntWmode( u32 index, u32 value )
void psxRcntWtarget( u32 index, u32 value )
{
verboseLog( 1, "[RCNT %i] wtarget: %x\n", index, value );
rcnts[index].target = value;
-@@ -413,6 +418,7 @@ void psxRcntWtarget( u32 index, u32 value )
+@@ -423,6 +428,7 @@ void psxRcntWtarget( u32 index, u32 value )
u32 psxRcntRcount( u32 index )
{
u32 count;
count = _psxRcntRcount( index );
diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c
-index dbcb989..0716f5e 100644
+index 27ddfeab..d7c6ff05 100644
--- a/libpcsxcore/psxhw.c
+++ b/libpcsxcore/psxhw.c
-@@ -373,13 +373,14 @@ void psxHwWrite8(u32 add, u8 value) {
+@@ -377,13 +377,14 @@ void psxHwWrite8(u32 add, u8 value) {
case 0x1f801803: cdrWrite3(value); break;
default:
#ifdef PSXHW_LOG
PSXHW_LOG("*Known 8bit write at address %x value %x\n", add, value);
#endif
-@@ -504,6 +505,7 @@ void psxHwWrite16(u32 add, u16 value) {
+@@ -506,6 +507,7 @@ void psxHwWrite16(u32 add, u16 value) {
return;
}
psxHu16ref(add) = SWAPu16(value);
#ifdef PSXHW_LOG
PSXHW_LOG("*Unknown 16bit write at address %x value %x\n", add, value);
-@@ -699,9 +701,9 @@ void psxHwWrite32(u32 add, u32 value) {
+@@ -701,9 +703,9 @@ void psxHwWrite32(u32 add, u32 value) {
return;
case 0x1f801820:
case 0x1f801100:
#ifdef PSXHW_LOG
-@@ -759,6 +761,7 @@ void psxHwWrite32(u32 add, u32 value) {
+@@ -761,6 +763,7 @@ void psxHwWrite32(u32 add, u32 value) {
return;
}
#ifdef PSXHW_LOG
PSXHW_LOG("*Unknown 32bit write at address %x value %x\n", add, value);
diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c
-index e7e3269..8f4004d 100644
+index be15f782..6f07478f 100644
--- a/libpcsxcore/psxinterpreter.c
+++ b/libpcsxcore/psxinterpreter.c
-@@ -467,6 +467,8 @@ static void doBranch(u32 tar) {
- psxRegs.pc += 4;
- psxRegs.cycle += BIAS;
-
-+ (void)tmp;
-+#if 0
- // check for load delay
- tmp = psxRegs.code >> 26;
- switch (tmp) {
-@@ -500,13 +502,15 @@ static void doBranch(u32 tar) {
- }
- break;
- }
--
-+#endif
- psxBSC[psxRegs.code >> 26]();
+@@ -237,7 +237,7 @@ static inline void addCycle(psxRegisters *regs)
+ {
+ assert(regs->subCycleStep >= 0x10000);
+ regs->subCycle += regs->subCycleStep;
+- regs->cycle += regs->subCycle >> 16;
++ regs->cycle += 2; //regs->subCycle >> 16;
+ regs->subCycle &= 0xffff;
+ }
- branch = 0;
- psxRegs.pc = branchPC;
+@@ -434,7 +434,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) {
+ regs->CP0.n.Target = pc_final;
+ regs->branching = 0;
-+ psxRegs.cycle += BIAS;
++ psxRegs.cycle += 2;
psxBranchTest();
-+ psxRegs.cycle -= BIAS;
++ psxRegs.cycle -= 2;
}
- /*********************************************************
-@@ -616,12 +620,13 @@ void psxMULTU_stall() {
- psxMULTU();
+ static void doBranchReg(psxRegisters *regs, u32 tar) {
+@@ -967,7 +969,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) {
+ }
}
-+#define doBranchNotTaken() do { psxRegs.cycle += BIAS; execI(); psxBranchTest(); psxRegs.cycle -= BIAS; } while(0)
- /*********************************************************
- * Register branch logic *
- * Format: OP rs, offset *
- *********************************************************/
--#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_);
--#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } }
-+#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); else doBranchNotTaken();
-+#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } else doBranchNotTaken(); }
-
- void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0
- void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link
-@@ -703,7 +708,7 @@ void psxRFE() {
- * Register branch logic *
- * Format: OP rs, rt, offset *
- *********************************************************/
--#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_);
-+#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); else doBranchNotTaken();
-
- void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt
- void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt
-@@ -901,7 +907,7 @@ void MTC0(int reg, u32 val) {
- }
+-OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
++OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); psxBranchTest(); }
+
+ // no exception
+ static inline void psxNULLne(psxRegisters *regs) {
+@@ -1175,18 +1177,19 @@ static void intReset() {
+ static inline void execI_(u8 **memRLUT, psxRegisters *regs) {
+ u32 pc = regs->pc;
+
+- addCycle(regs);
++ //addCycle(regs);
+ dloadStep(regs);
+
+ regs->pc += 4;
+ regs->code = fetch(regs, memRLUT, pc);
+ psxBSC[regs->code >> 26](regs, regs->code);
++ psxRegs.cycle += 2;
}
--void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); }
-+void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); psxBranchTest(); }
- void psxCTC0() { MTC0(_Rd_, _u32(_rRt_)); }
+ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) {
+ u32 pc = regs->pc;
+
+- addCycle(regs);
++ //addCycle(regs);
+ dloadStep(regs);
+
+ if (execBreakCheck(regs, pc))
+@@ -1195,6 +1198,7 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) {
+ regs->pc += 4;
+ regs->code = fetch(regs, memRLUT, pc);
+ psxBSC[regs->code >> 26](regs, regs->code);
++ psxRegs.cycle += 2;
+ }
- /*********************************************************
-@@ -1028,6 +1034,23 @@ void intExecuteBlock() {
- while (!branch2) execI();
+ static void intExecute() {
+@@ -1224,6 +1228,30 @@ void intExecuteBlock(enum blockExecCaller caller) {
+ execI_(memRLUT, regs_);
}
+extern void do_insn_trace(void);
+
+void intExecuteT() {
-+ for (;;) {
++ psxRegisters *regs_ = &psxRegs;
++ u8 **memRLUT = psxMemRLUT;
++ extern int stop;
++
++ while (!stop) {
+ do_insn_trace();
-+ execI();
++ execIbp(memRLUT, regs_);
+ }
+}
+
+void intExecuteBlockT() {
-+ branch2 = 0;
-+ while (!branch2) {
++ psxRegisters *regs_ = &psxRegs;
++ u8 **memRLUT = psxMemRLUT;
++
++ branchSeen = 0;
++ while (!branchSeen) {
+ do_insn_trace();
-+ execI();
++ execIbp(memRLUT, regs_);
+ }
+}
+
static void intClear(u32 Addr, u32 Size) {
}
-@@ -1050,7 +1073,7 @@ void intApplyConfig() {
+@@ -1271,7 +1299,7 @@ void intApplyConfig() {
assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall);
assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall);
psxBSC[18] = psxCOP2;
psxBSC[50] = gteLWC2;
psxBSC[58] = gteSWC2;
-@@ -1092,9 +1115,10 @@ void execI() {
- if (Config.Debug) ProcessDebug();
-
- psxRegs.pc += 4;
-- psxRegs.cycle += BIAS;
-
- psxBSC[psxRegs.code >> 26]();
-+
-+ psxRegs.cycle += BIAS;
- }
-
- R3000Acpu psxInt = {
diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c
-index 46cee0c..c814587 100644
+index 54219ae0..41168ced 100644
--- a/libpcsxcore/psxmem.c
+++ b/libpcsxcore/psxmem.c
-@@ -218,11 +218,13 @@ void psxMemShutdown() {
+@@ -278,10 +278,13 @@ void psxMemOnIsolate(int enable)
+ : R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL);
}
- static int writeok = 1;
+extern u32 last_io_addr;
-
++
u8 psxMemRead8(u32 mem) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -248,6 +250,7 @@ u16 psxMemRead16(u32 mem) {
+@@ -307,6 +310,7 @@ u16 psxMemRead16(u32 mem) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -273,6 +276,7 @@ u32 psxMemRead32(u32 mem) {
+@@ -332,6 +336,7 @@ u32 psxMemRead32(u32 mem) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -298,6 +302,7 @@ void psxMemWrite8(u32 mem, u8 value) {
+@@ -359,6 +364,7 @@ void psxMemWrite8(u32 mem, u8 value) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -325,6 +330,7 @@ void psxMemWrite16(u32 mem, u16 value) {
+@@ -386,6 +392,7 @@ void psxMemWrite16(u32 mem, u16 value) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -352,6 +358,7 @@ void psxMemWrite32(u32 mem, u32 value) {
+@@ -413,6 +420,7 @@ void psxMemWrite32(u32 mem, u32 value) {
char *p;
u32 t;
// if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n");
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
-@@ -381,6 +388,8 @@ void psxMemWrite32(u32 mem, u32 value) {
- } else {
- int i;
-
+@@ -431,6 +439,8 @@ void psxMemWrite32(u32 mem, u32 value) {
+ #endif
+ } else {
+ if (mem == 0xfffe0130) {
+extern u32 handler_cycle;
+handler_cycle = psxRegs.cycle;
- switch (value) {
- case 0x800: case 0x804:
- if (writeok == 0) break;
+ psxRegs.biuReg = value;
+ return;
+ }
diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c
-index 7e6f16b..0114947 100644
+index dffbf6e7..0a3bdb65 100644
--- a/libpcsxcore/r3000a.c
+++ b/libpcsxcore/r3000a.c
-@@ -120,6 +120,8 @@ void psxException(u32 code, u32 bd) {
+@@ -124,6 +124,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) {
}
void psxBranchTest() {