#endif
#define RAM_SIZE 0x200000
-#define MAXBLOCK 4096
+#define MAXBLOCK 2048
#define MAX_OUTPUT_BLOCK_SIZE 262144
#define EXPIRITY_OFFSET (MAX_OUTPUT_BLOCK_SIZE * 2)
#define PAGE_COUNT 1024
{
signed char regmap_entry[HOST_REGS];
signed char regmap[HOST_REGS];
- uint64_t wasdirty;
- uint64_t dirty;
- uint64_t u;
+ u_int wasdirty;
+ u_int dirty;
u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true
u_int isconst; // ... but isconst is false when r2 is known (hr)
u_int loadedconst; // host regs that have constants loaded
u_int noevict; // can't evict this hr (alloced by current op)
//u_int waswritten; // MIPS regs that were used as store base before
+ uint64_t u;
};
struct ht_entry
u_char is_delay_load:1; // is_load + MFC/CFC
u_char is_exception:1; // unconditional, also interp. fallback
u_char may_except:1; // might generate an exception
+ u_char ls_type:2; // load/store type (ls_width_type)
} dops[MAXBLOCK];
+enum ls_width_type {
+ LS_8 = 0, LS_16, LS_32, LS_LR
+};
+
static struct compile_info
{
int imm;
static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_);
// Needed by assembler
-static void wb_register(signed char r, const signed char regmap[], uint64_t dirty);
-static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty);
-static void wb_needed_dirtys(const signed char i_regmap[], uint64_t i_dirty, int addr);
+static void wb_register(signed char r, const signed char regmap[], u_int dirty);
+static void wb_dirtys(const signed char i_regmap[], u_int i_dirty);
+static void wb_needed_dirtys(const signed char i_regmap[], u_int i_dirty, int addr);
static void load_all_regs(const signed char i_regmap[]);
static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[]);
static void load_regs_entry(int t);
clear_const(current,dops[i].rs2);
alloc_cc(current,i); // for stalls
dirty_reg(current,CCREG);
- if(dops[i].rs1&&dops[i].rs2)
- {
- current->u&=~(1LL<<HIREG);
- current->u&=~(1LL<<LOREG);
- alloc_reg(current,i,HIREG);
- alloc_reg(current,i,LOREG);
- alloc_reg(current,i,dops[i].rs1);
- alloc_reg(current,i,dops[i].rs2);
- dirty_reg(current,HIREG);
- dirty_reg(current,LOREG);
- }
- else
+ current->u &= ~(1ull << HIREG);
+ current->u &= ~(1ull << LOREG);
+ alloc_reg(current, i, HIREG);
+ alloc_reg(current, i, LOREG);
+ dirty_reg(current, HIREG);
+ dirty_reg(current, LOREG);
+ if ((dops[i].opcode2 & 0x3e) == 0x1a || (dops[i].rs1 && dops[i].rs2)) // div(u)
{
- // Multiply by zero is zero.
- // MIPS does not have a divide by zero exception.
- alloc_reg(current,i,HIREG);
- alloc_reg(current,i,LOREG);
- dirty_reg(current,HIREG);
- dirty_reg(current,LOREG);
- if (dops[i].rs1 && ((dops[i].opcode2 & 0x3e) == 0x1a)) // div(u) 0
- alloc_reg(current, i, dops[i].rs1);
+ alloc_reg(current, i, dops[i].rs1);
+ alloc_reg(current, i, dops[i].rs2);
}
+ // else multiply by zero is zero
}
#endif
}
// Write out a single register
-static void wb_register(signed char r, const signed char regmap[], uint64_t dirty)
+static void wb_register(signed char r, const signed char regmap[], u_int dirty)
{
int hr;
for(hr=0;hr<HOST_REGS;hr++) {
assert(regmap[hr]<64);
emit_storereg(r,hr);
}
+ break;
}
}
}
static void do_invstub(int n)
{
literal_pool(20);
- assem_debug("do_invstub\n");
+ assem_debug("do_invstub %x\n", start + stubs[n].e*4);
u_int reglist = stubs[n].a;
u_int addrr = stubs[n].b;
int ofs_start = stubs[n].c;
imm_min -= cinfo[i].imm;
imm_max -= cinfo[i].imm;
add_stub(INVCODE_STUB, jaddr, out, reglist|(1<<HOST_CCREG),
- addr, imm_min, imm_max, 0);
+ addr, imm_min, imm_max, i);
}
+// determines if code overwrite checking is needed only
+// (also true non-existent 0x20000000 mirror that shouldn't matter)
+#define is_ram_addr(a) !((a) & 0x5f800000)
+
static void store_assemble(int i, const struct regstat *i_regs, int ccadj_)
{
int s,tl;
int memtarget=0,c=0;
int offset_reg = -1;
int fastio_reg_override = -1;
+ u_int addr_const = ~0;
u_int reglist=get_host_reglist(i_regs->regmap);
tl=get_reg(i_regs->regmap,dops[i].rs2);
s=get_reg(i_regs->regmap,dops[i].rs1);
offset=cinfo[i].imm;
if(s>=0) {
c=(i_regs->wasconst>>s)&1;
- if(c) {
- memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
+ if (c) {
+ addr_const = constmap[i][s] + offset;
+ memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE);
}
}
assert(tl>=0);
assert(addr >= 0);
if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
+ reglist |= 1u << addr;
if (!c) {
jaddr = emit_fastpath_cmp_jump(i, i_regs, addr,
&offset_reg, &fastio_reg_override, ccadj_);
}
if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG)
host_tempreg_release();
- if(jaddr) {
+ if (jaddr) {
// PCSX store handlers don't check invcode again
- reglist|=1<<addr;
- add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj_,reglist);
- jaddr=0;
- }
- {
- if(!c||memtarget) {
- do_store_smc_check(i, i_regs, reglist, addr);
- }
- }
- u_int addr_val=constmap[i][s]+offset;
- if(jaddr) {
add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj_,reglist);
- } else if(c&&!memtarget) {
- inline_writestub(type,i,addr_val,i_regs->regmap,dops[i].rs2,ccadj_,reglist);
}
+ if (!c || is_ram_addr(addr_const))
+ do_store_smc_check(i, i_regs, reglist, addr);
+ if (c && !memtarget)
+ inline_writestub(type, i, addr_const, i_regs->regmap, dops[i].rs2, ccadj_, reglist);
// basic current block modification detection..
// not looking back as that should be in mips cache already
// (see Spyro2 title->attract mode)
- if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
- SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
+ if (start + i*4 < addr_const && addr_const < start + slen*4) {
+ SysPrintf("write to %08x hits block %08x, pc=%08x\n", addr_const, start, start+i*4);
assert(i_regs->regmap==regs[i].regmap); // not delay slot
if(i_regs->regmap==regs[i].regmap) {
load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
void *done0, *done1, *done2;
int memtarget=0,c=0;
int offset_reg = -1;
- u_int reglist=get_host_reglist(i_regs->regmap);
+ u_int addr_const = ~0;
+ u_int reglist = get_host_reglist(i_regs->regmap);
tl=get_reg(i_regs->regmap,dops[i].rs2);
s=get_reg(i_regs->regmap,dops[i].rs1);
offset=cinfo[i].imm;
if(s>=0) {
- c=(i_regs->isconst>>s)&1;
- if(c) {
- memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
+ c = (i_regs->isconst >> s) & 1;
+ if (c) {
+ addr_const = constmap[i][s] + offset;
+ memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE);
}
}
assert(tl>=0);
assert(addr >= 0);
+ reglist |= 1u << addr;
if(!c) {
emit_cmpimm(addr, RAM_SIZE);
jaddr=out;
if (dops[i].opcode == 0x2A) { // SWL
// Write two msb into two least significant bytes
if (dops[i].rs2) emit_rorimm(tl, 16, tl);
- do_store_hword(addr, -1, tl, offset_reg, 0);
+ do_store_hword(addr, -1, tl, offset_reg, 1);
if (dops[i].rs2) emit_rorimm(tl, 16, tl);
}
else if (dops[i].opcode == 0x2E) { // SWR
// Write 3 lsb into three most significant bytes
do_store_byte(addr, tl, offset_reg);
if (dops[i].rs2) emit_rorimm(tl, 8, tl);
- do_store_hword(addr, 1, tl, offset_reg, 0);
+ do_store_hword(addr, 1, tl, offset_reg, 1);
if (dops[i].rs2) emit_rorimm(tl, 24, tl);
}
done1=out;
// 3
set_jump_target(case3, out);
if (dops[i].opcode == 0x2A) { // SWL
- do_store_word(addr, -3, tl, offset_reg, 0);
+ do_store_word(addr, -3, tl, offset_reg, 1);
}
else if (dops[i].opcode == 0x2E) { // SWR
do_store_byte(addr, tl, offset_reg);
set_jump_target(done2, out);
if (offset_reg == HOST_TEMPREG)
host_tempreg_release();
- if(!c||!memtarget)
+ if (!c || !memtarget)
add_stub_r(STORELR_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist);
- do_store_smc_check(i, i_regs, reglist, addr);
+ if (!c || is_ram_addr(addr_const))
+ do_store_smc_check(i, i_regs, reglist, addr);
}
static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_)
enum stub_type type;
int offset_reg = -1;
int fastio_reg_override = -1;
+ u_int addr_const = ~0;
u_int reglist=get_host_reglist(i_regs->regmap);
u_int copr=(source[i]>>16)&0x1f;
s=get_reg(i_regs->regmap,dops[i].rs1);
if (dops[i].opcode==0x3a) { // SWC2
reglist |= 1<<ar;
}
- if(s>=0) c=(i_regs->wasconst>>s)&1;
- memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE);
+ if (s >= 0) {
+ c = (i_regs->isconst >> s) & 1;
+ if (c) {
+ addr_const = constmap[i][s] + offset;
+ memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE);
+ }
+ }
cop2_do_stall_check(0, i, i_regs, reglist);
host_tempreg_release();
if(jaddr2)
add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj_,reglist);
- if(dops[i].opcode==0x3a) // SWC2
+ if (dops[i].opcode == 0x3a && (!c || is_ram_addr(addr_const))) // SWC2
do_store_smc_check(i, i_regs, reglist, ar);
- if (dops[i].opcode==0x32) { // LWC2
+ if (dops[i].opcode == 0x32) { // LWC2
host_tempreg_acquire();
cop2_put_dreg(copr,tl,HOST_TEMPREG);
host_tempreg_release();
cinfo[i].addr = rs;
add_offset = 0;
}
- else if (dops[i].itype == STORELR) { // overwrites addr
- assert(ra >= 0);
- assert(rs != ra);
- emit_mov(rs, ra);
- cinfo[i].addr = ra;
- }
else
cinfo[i].addr = rs;
if (add_offset) {
if(i==0||dops[i].bt)
regs[i].loadedconst=0;
else {
- for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG&®map[hr]>=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr]
- &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1))
+ for (hr = 0; hr < HOST_REGS; hr++) {
+ if (hr == EXCLUDE_REG || regmap[hr] < 0 || pre[hr] != regmap[hr])
+ continue;
+ if ((((regs[i-1].isconst & regs[i-1].loadedconst) >> hr) & 1)
+ && regmap[hr] == regs[i-1].regmap[hr])
{
- regs[i].loadedconst|=1<<hr;
+ regs[i].loadedconst |= 1u << hr;
}
}
}
}
// Write out all dirty registers (except cycle count)
-static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty)
+#ifndef wb_dirtys
+static void wb_dirtys(const signed char i_regmap[], u_int i_dirty)
{
int hr;
for(hr=0;hr<HOST_REGS;hr++) {
}
}
}
+#endif
// Write out dirty registers that we need to reload (pair with load_needed_regs)
// This writes the registers not written by store_regs_bt
-static void wb_needed_dirtys(const signed char i_regmap[], uint64_t i_dirty, int addr)
+static void wb_needed_dirtys(const signed char i_regmap[], u_int i_dirty, int addr)
{
int hr;
int t=(addr-start)>>2;
}
// Load all registers (except cycle count)
+#ifndef load_all_regs
static void load_all_regs(const signed char i_regmap[])
{
int hr;
}
}
}
+#endif
// Load all current registers also needed by next instruction
static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[])
{
+ signed char regmap_sel[HOST_REGS];
int hr;
- for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG) {
- if(get_reg(next_regmap,i_regmap[hr])>=0) {
- if(i_regmap[hr]==0) {
- emit_zeroreg(hr);
- }
- else
- if(i_regmap[hr]>0 && i_regmap[hr]<TEMPREG && i_regmap[hr]!=CCREG)
- {
- emit_loadreg(i_regmap[hr],hr);
- }
- }
- }
+ for (hr = 0; hr < HOST_REGS; hr++) {
+ regmap_sel[hr] = -1;
+ if (hr != EXCLUDE_REG)
+ if (next_regmap[hr] == i_regmap[hr] || get_reg(next_regmap, i_regmap[hr]) >= 0)
+ regmap_sel[hr] = i_regmap[hr];
}
+ load_all_regs(regmap_sel);
}
// Load all regs, storing cycle count if necessary
static void load_regs_entry(int t)
{
- int hr;
if(dops[t].is_ds) emit_addimm(HOST_CCREG,CLOCK_ADJUST(1),HOST_CCREG);
else if(cinfo[t].ccadj) emit_addimm(HOST_CCREG,-cinfo[t].ccadj,HOST_CCREG);
if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
emit_storereg(CCREG,HOST_CCREG);
}
- // Load 32-bit regs
- for(hr=0;hr<HOST_REGS;hr++) {
- if(regs[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]<TEMPREG) {
- if(regs[t].regmap_entry[hr]==0) {
- emit_zeroreg(hr);
- }
- else if(regs[t].regmap_entry[hr]!=CCREG)
- {
- emit_loadreg(regs[t].regmap_entry[hr],hr);
- }
- }
- }
+ load_all_regs(regs[t].regmap_entry);
}
// Store dirty registers prior to branch
static void disassemble_one(int i, u_int src)
{
unsigned int type, op, op2, op3;
+ enum ls_width_type ls_type = LS_32;
memset(&dops[i], 0, sizeof(dops[i]));
memset(&cinfo[i], 0, sizeof(cinfo[i]));
cinfo[i].ba = -1;
case 0x13: set_mnemonic(i, "COP3");
op2 = (src >> 21) & 0x1f;
break;
- case 0x20: set_mnemonic(i, "LB"); type=LOAD; break;
- case 0x21: set_mnemonic(i, "LH"); type=LOAD; break;
- case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break;
- case 0x23: set_mnemonic(i, "LW"); type=LOAD; break;
- case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break;
- case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break;
- case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break;
- case 0x28: set_mnemonic(i, "SB"); type=STORE; break;
- case 0x29: set_mnemonic(i, "SH"); type=STORE; break;
- case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break;
- case 0x2B: set_mnemonic(i, "SW"); type=STORE; break;
- case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break;
- case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; break;
- case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break;
+ case 0x20: set_mnemonic(i, "LB"); type=LOAD; ls_type = LS_8; break;
+ case 0x21: set_mnemonic(i, "LH"); type=LOAD; ls_type = LS_16; break;
+ case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; ls_type = LS_LR; break;
+ case 0x23: set_mnemonic(i, "LW"); type=LOAD; ls_type = LS_32; break;
+ case 0x24: set_mnemonic(i, "LBU"); type=LOAD; ls_type = LS_8; break;
+ case 0x25: set_mnemonic(i, "LHU"); type=LOAD; ls_type = LS_16; break;
+ case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; ls_type = LS_LR; break;
+ case 0x28: set_mnemonic(i, "SB"); type=STORE; ls_type = LS_8; break;
+ case 0x29: set_mnemonic(i, "SH"); type=STORE; ls_type = LS_16; break;
+ case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; ls_type = LS_LR; break;
+ case 0x2B: set_mnemonic(i, "SW"); type=STORE; ls_type = LS_32; break;
+ case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; ls_type = LS_LR; break;
+ case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; ls_type = LS_32; break;
+ case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; ls_type = LS_32; break;
case 0x3B:
if (Config.HLE && (src & 0x03ffffff) < ARRAY_SIZE(psxHLEt)) {
set_mnemonic(i, "HLECALL");
}
if (type == INTCALL)
SysPrintf("NI %08x @%08x (%08x)\n", src, start + i*4, start);
- dops[i].itype=type;
- dops[i].opcode2=op2;
+ dops[i].itype = type;
+ dops[i].opcode2 = op2;
+ dops[i].ls_type = ls_type;
/* Get registers/immediates */
dops[i].use_lt1=0;
gte_rs[i]=gte_rt[i]=0;
static noinline void pass1_disassemble(u_int pagelimit)
{
int i, j, done = 0, ni_count = 0;
+ int ds_next = 0;
for (i = 0; !done; i++)
{
unsigned int type, op, op2;
disassemble_one(i, source[i]);
+ dops[i].is_ds = ds_next; ds_next = 0;
type = dops[i].itype;
op = dops[i].opcode;
op2 = dops[i].opcode2;
dops[i].is_store = type == STORE || type == STORELR || op == 0x3a; // SWC2
dops[i].is_exception = type == SYSCALL || type == HLECALL || type == INTCALL;
dops[i].may_except = dops[i].is_exception || (type == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8;
+ ds_next = dops[i].is_jump;
if (((op & 0x37) == 0x21 || op == 0x25) // LH/SH/LHU
&& ((cinfo[i].imm & 1) || Config.PreciseExceptions))
// Don't recompile stuff that's already compiled
if(check_addr(start+i*4+4)) done=1;
// Don't get too close to the limit
- if(i>MAXBLOCK/2) done=1;
+ if (i > MAXBLOCK - 64)
+ done = 1;
}
if (dops[i].itype == HLECALL)
- stop = 1;
+ done = 1;
else if (dops[i].itype == INTCALL)
- stop = 2;
+ done = 2;
else if (dops[i].is_exception)
done = stop_after_jal ? 1 : 2;
if (done == 2) {
//assert(i<MAXBLOCK-1);
if(start+i*4==pagelimit-4) done=1;
assert(start+i*4<pagelimit);
- if (i==MAXBLOCK-1) done=1;
+ if (i == MAXBLOCK - 2)
+ done = 1;
// Stop if we're compiling junk
if (dops[i].itype == INTCALL && (++ni_count > 8 || dops[i].opcode == 0x11)) {
done=stop_after_jal=1;
}
}
+static noinline void pass2a_unneeded_other(void)
+{
+ int i, j;
+ for (i = 0; i < slen; i++)
+ {
+ // remove redundant alignment checks
+ if (dops[i].may_except && (dops[i].is_load || dops[i].is_store)
+ && dops[i].rt1 != dops[i].rs1 && !dops[i].is_ds)
+ {
+ int base = dops[i].rs1, lsb = cinfo[i].imm, ls_type = dops[i].ls_type;
+ int mask = ls_type == LS_32 ? 3 : 1;
+ lsb &= mask;
+ for (j = i + 1; j < slen; j++) {
+ if (dops[j].bt || dops[j].is_jump)
+ break;
+ if ((dops[j].is_load || dops[j].is_store) && dops[j].rs1 == base
+ && dops[j].ls_type == ls_type && (cinfo[j].imm & mask) == lsb)
+ dops[j].may_except = 0;
+ if (dops[j].rt1 == base)
+ break;
+ }
+ }
+ }
+}
+
static noinline void pass3_register_alloc(u_int addr)
{
struct regstat current; // Current register allocations/status
abort();
}
}
- dops[i].is_ds=ds;
+ assert(dops[i].is_ds == ds);
if(ds) {
ds=0; // Skip delay slot, already allocated as part of branch
// ...but we need to alloc it in case something jumps here
regs[i+2].wasdirty&=~(1<<hr);
}
assert(hr>=0);
+ #if 0 // what is this for? double allocs $0 in ps1_rom.bin
if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0)
{
regs[i].regmap[hr]=dops[i+1].rs1;
regs[i+1].wasdirty&=~(1<<hr);
regs[i].dirty&=~(1<<hr);
}
+ #endif
}
}
if (dops[i+1].itype == LOADLR || dops[i+1].opcode == 0x32) { // LWC2
new_dynarec_did_compile=1;
if (Config.HLE && start == 0x80001000) // hlecall
{
- // XXX: is this enough? Maybe check hleSoftCall?
void *beginning = start_block();
emit_movimm(start,0);
pass2_unneeded_regs(0,slen-1,0);
+ pass2a_unneeded_other();
+
/* Pass 3 - Register allocation */
pass3_register_alloc(addr);