#include "new_dynarec_config.h"
#include "../psxhle.h"
#include "../psxinterpreter.h"
+#include "../psxcounters.h"
#include "../gte.h"
#include "emu_if.h" // emulator interface
#include "linkage_offsets.h"
return &hash_table[((vaddr>>16)^vaddr)&0xFFFF];
}
+#define HASH_TABLE_BAD 0xbac
+
+static void hash_table_clear(void)
+{
+ struct ht_entry *ht_bin;
+ int i, j;
+ for (i = 0; i < ARRAY_SIZE(hash_table); i++) {
+ for (j = 0; j < ARRAY_SIZE(hash_table[i].vaddr); j++) {
+ hash_table[i].vaddr[j] = ~0;
+ hash_table[i].tcaddr[j] = (void *)(uintptr_t)HASH_TABLE_BAD;
+ }
+ }
+ // don't allow ~0 to hit
+ ht_bin = hash_table_get(~0);
+ for (j = 0; j < ARRAY_SIZE(ht_bin->vaddr); j++)
+ ht_bin->vaddr[j] = 1;
+}
+
static void hash_table_add(u_int vaddr, void *tcaddr)
{
struct ht_entry *ht_bin = hash_table_get(vaddr);
//printf("remove hash: %x\n",vaddr);
struct ht_entry *ht_bin = hash_table_get(vaddr);
if (ht_bin->vaddr[1] == vaddr) {
- ht_bin->vaddr[1] = -1;
- ht_bin->tcaddr[1] = NULL;
+ ht_bin->vaddr[1] = ~0;
+ ht_bin->tcaddr[1] = (void *)(uintptr_t)HASH_TABLE_BAD;
}
if (ht_bin->vaddr[0] == vaddr) {
ht_bin->vaddr[0] = ht_bin->vaddr[1];
ht_bin->tcaddr[0] = ht_bin->tcaddr[1];
- ht_bin->vaddr[1] = -1;
- ht_bin->tcaddr[1] = NULL;
+ ht_bin->vaddr[1] = ~0;
+ ht_bin->tcaddr[1] = (void *)(uintptr_t)HASH_TABLE_BAD;
}
}
+static void mini_ht_clear(void)
+{
+#ifdef USE_MINI_HT
+ int i;
+ for (i = 0; i < ARRAY_SIZE(mini_ht) - 1; i++) {
+ mini_ht[i][0] = ~0;
+ mini_ht[i][1] = HASH_TABLE_BAD;
+ }
+ mini_ht[i][0] = 1;
+ mini_ht[i][1] = HASH_TABLE_BAD;
+#endif
+}
+
static void mark_invalid_code(u_int vaddr, u_int len, char invalid)
{
u_int vaddr_m = vaddr & 0x1fffffff;
}
if (hit) {
do_clear_cache();
-#ifdef USE_MINI_HT
- memset(mini_ht, -1, sizeof(mini_ht));
-#endif
+ mini_ht_clear();
}
if (inv_start <= (start_m & ~0xfff) && inv_end >= (start_m | 0xfff))
}
}
- #ifdef USE_MINI_HT
- memset(mini_ht, -1, sizeof(mini_ht));
- #endif
do_clear_cache();
+ mini_ht_clear();
}
// Add an entry to jump_out after making a link
clear_const(current,dops[i].rs2);
alloc_cc(current,i); // for stalls
dirty_reg(current,CCREG);
- if(dops[i].rs1&&dops[i].rs2)
- {
- current->u&=~(1LL<<HIREG);
- current->u&=~(1LL<<LOREG);
- alloc_reg(current,i,HIREG);
- alloc_reg(current,i,LOREG);
- alloc_reg(current,i,dops[i].rs1);
- alloc_reg(current,i,dops[i].rs2);
- dirty_reg(current,HIREG);
- dirty_reg(current,LOREG);
- }
- else
+ current->u &= ~(1ull << HIREG);
+ current->u &= ~(1ull << LOREG);
+ alloc_reg(current, i, HIREG);
+ alloc_reg(current, i, LOREG);
+ dirty_reg(current, HIREG);
+ dirty_reg(current, LOREG);
+ if ((dops[i].opcode2 & 0x3e) == 0x1a || (dops[i].rs1 && dops[i].rs2)) // div(u)
{
- // Multiply by zero is zero.
- // MIPS does not have a divide by zero exception.
- alloc_reg(current,i,HIREG);
- alloc_reg(current,i,LOREG);
- dirty_reg(current,HIREG);
- dirty_reg(current,LOREG);
- if (dops[i].rs1 && ((dops[i].opcode2 & 0x3e) == 0x1a)) // div(u) 0
- alloc_reg(current, i, dops[i].rs1);
+ alloc_reg(current, i, dops[i].rs1);
+ alloc_reg(current, i, dops[i].rs2);
}
+ // else multiply by zero is zero
}
#endif
static void do_invstub(int n)
{
literal_pool(20);
- assem_debug("do_invstub\n");
+ assem_debug("do_invstub %x\n", start + stubs[n].e*4);
u_int reglist = stubs[n].a;
u_int addrr = stubs[n].b;
int ofs_start = stubs[n].c;
imm_min -= cinfo[i].imm;
imm_max -= cinfo[i].imm;
add_stub(INVCODE_STUB, jaddr, out, reglist|(1<<HOST_CCREG),
- addr, imm_min, imm_max, 0);
+ addr, imm_min, imm_max, i);
}
+// determines if code overwrite checking is needed only
+// (also true non-existent 0x20000000 mirror that shouldn't matter)
+#define is_ram_addr(a) !((a) & 0x5f800000)
+
static void store_assemble(int i, const struct regstat *i_regs, int ccadj_)
{
int s,tl;
int memtarget=0,c=0;
int offset_reg = -1;
int fastio_reg_override = -1;
+ u_int addr_const = ~0;
u_int reglist=get_host_reglist(i_regs->regmap);
tl=get_reg(i_regs->regmap,dops[i].rs2);
s=get_reg(i_regs->regmap,dops[i].rs1);
offset=cinfo[i].imm;
if(s>=0) {
c=(i_regs->wasconst>>s)&1;
- if(c) {
- memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
+ if (c) {
+ addr_const = constmap[i][s] + offset;
+ memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE);
}
}
assert(tl>=0);
assert(addr >= 0);
if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
+ reglist |= 1u << addr;
if (!c) {
jaddr = emit_fastpath_cmp_jump(i, i_regs, addr,
&offset_reg, &fastio_reg_override, ccadj_);
}
if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG)
host_tempreg_release();
- if(jaddr) {
+ if (jaddr) {
// PCSX store handlers don't check invcode again
- reglist|=1<<addr;
- add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj_,reglist);
- jaddr=0;
- }
- {
- if(!c||memtarget) {
- do_store_smc_check(i, i_regs, reglist, addr);
- }
- }
- u_int addr_val=constmap[i][s]+offset;
- if(jaddr) {
add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj_,reglist);
- } else if(c&&!memtarget) {
- inline_writestub(type,i,addr_val,i_regs->regmap,dops[i].rs2,ccadj_,reglist);
}
+ if (!c || is_ram_addr(addr_const))
+ do_store_smc_check(i, i_regs, reglist, addr);
+ if (c && !memtarget)
+ inline_writestub(type, i, addr_const, i_regs->regmap, dops[i].rs2, ccadj_, reglist);
// basic current block modification detection..
// not looking back as that should be in mips cache already
// (see Spyro2 title->attract mode)
- if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
- SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
+ if (start + i*4 < addr_const && addr_const < start + slen*4) {
+ SysPrintf("write to %08x hits block %08x, pc=%08x\n", addr_const, start, start+i*4);
assert(i_regs->regmap==regs[i].regmap); // not delay slot
if(i_regs->regmap==regs[i].regmap) {
load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
void *done0, *done1, *done2;
int memtarget=0,c=0;
int offset_reg = -1;
- u_int reglist=get_host_reglist(i_regs->regmap);
+ u_int addr_const = ~0;
+ u_int reglist = get_host_reglist(i_regs->regmap);
tl=get_reg(i_regs->regmap,dops[i].rs2);
s=get_reg(i_regs->regmap,dops[i].rs1);
offset=cinfo[i].imm;
if(s>=0) {
- c=(i_regs->isconst>>s)&1;
- if(c) {
- memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
+ c = (i_regs->isconst >> s) & 1;
+ if (c) {
+ addr_const = constmap[i][s] + offset;
+ memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE);
}
}
assert(tl>=0);
assert(addr >= 0);
+ reglist |= 1u << addr;
if(!c) {
emit_cmpimm(addr, RAM_SIZE);
jaddr=out;
if (dops[i].opcode == 0x2A) { // SWL
// Write two msb into two least significant bytes
if (dops[i].rs2) emit_rorimm(tl, 16, tl);
- do_store_hword(addr, -1, tl, offset_reg, 0);
+ do_store_hword(addr, -1, tl, offset_reg, 1);
if (dops[i].rs2) emit_rorimm(tl, 16, tl);
}
else if (dops[i].opcode == 0x2E) { // SWR
// Write 3 lsb into three most significant bytes
do_store_byte(addr, tl, offset_reg);
if (dops[i].rs2) emit_rorimm(tl, 8, tl);
- do_store_hword(addr, 1, tl, offset_reg, 0);
+ do_store_hword(addr, 1, tl, offset_reg, 1);
if (dops[i].rs2) emit_rorimm(tl, 24, tl);
}
done1=out;
// 3
set_jump_target(case3, out);
if (dops[i].opcode == 0x2A) { // SWL
- do_store_word(addr, -3, tl, offset_reg, 0);
+ do_store_word(addr, -3, tl, offset_reg, 1);
}
else if (dops[i].opcode == 0x2E) { // SWR
do_store_byte(addr, tl, offset_reg);
set_jump_target(done2, out);
if (offset_reg == HOST_TEMPREG)
host_tempreg_release();
- if(!c||!memtarget)
+ if (!c || !memtarget)
add_stub_r(STORELR_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist);
- do_store_smc_check(i, i_regs, reglist, addr);
+ if (!c || is_ram_addr(addr_const))
+ do_store_smc_check(i, i_regs, reglist, addr);
}
static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_)
enum stub_type type;
int offset_reg = -1;
int fastio_reg_override = -1;
+ u_int addr_const = ~0;
u_int reglist=get_host_reglist(i_regs->regmap);
u_int copr=(source[i]>>16)&0x1f;
s=get_reg(i_regs->regmap,dops[i].rs1);
if (dops[i].opcode==0x3a) { // SWC2
reglist |= 1<<ar;
}
- if(s>=0) c=(i_regs->wasconst>>s)&1;
- memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE);
+ if (s >= 0) {
+ c = (i_regs->isconst >> s) & 1;
+ if (c) {
+ addr_const = constmap[i][s] + offset;
+ memtarget = ((signed int)addr_const) < (signed int)(0x80000000 + RAM_SIZE);
+ }
+ }
cop2_do_stall_check(0, i, i_regs, reglist);
host_tempreg_release();
if(jaddr2)
add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj_,reglist);
- if(dops[i].opcode==0x3a) // SWC2
+ if (dops[i].opcode == 0x3a && (!c || is_ram_addr(addr_const))) // SWC2
do_store_smc_check(i, i_regs, reglist, ar);
- if (dops[i].opcode==0x32) { // LWC2
+ if (dops[i].opcode == 0x32) { // LWC2
host_tempreg_acquire();
cop2_put_dreg(copr,tl,HOST_TEMPREG);
host_tempreg_release();
cinfo[i].addr = rs;
add_offset = 0;
}
- else if (dops[i].itype == STORELR) { // overwrites addr
- assert(ra >= 0);
- assert(rs != ra);
- emit_mov(rs, ra);
- cinfo[i].addr = ra;
- }
else
cinfo[i].addr = rs;
if (add_offset) {
if(i==0||dops[i].bt)
regs[i].loadedconst=0;
else {
- for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG&®map[hr]>=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr]
- &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1))
+ for (hr = 0; hr < HOST_REGS; hr++) {
+ if (hr == EXCLUDE_REG || regmap[hr] < 0 || pre[hr] != regmap[hr])
+ continue;
+ if ((((regs[i-1].isconst & regs[i-1].loadedconst) >> hr) & 1)
+ && regmap[hr] == regs[i-1].regmap[hr])
{
- regs[i].loadedconst|=1<<hr;
+ regs[i].loadedconst |= 1u << hr;
}
}
}
emit_storereg(reg, 0);
}
}
+ if (dops[i].opcode == 0x0f) { // LUI
+ emit_movimm(cinfo[i].imm << 16, 0);
+ emit_storereg(dops[i].rt1, 0);
+ }
emit_movimm(start+i*4,0);
emit_writeword(0,&pcaddr);
int cc = get_reg(regs[i].regmap_entry, CCREG);
restore_regs(reglist);
assem_debug("\\\\do_insn_cmp\n");
}
+static void drc_dbg_emit_wb_dirtys(int i, const struct regstat *i_regs)
+{
+ // write-out non-consts, consts are likely different because of get_final_value()
+ if (i_regs->dirty & ~i_regs->loadedconst) {
+ assem_debug("/ drc_dbg_wb\n");
+ wb_dirtys(i_regs->regmap, i_regs->dirty & ~i_regs->loadedconst);
+ assem_debug("\\ drc_dbg_wb\n");
+ }
+}
#else
#define drc_dbg_emit_do_cmp(x,y)
+#define drc_dbg_emit_wb_dirtys(x,y)
#endif
// Used when a branch jumps into the delay slot of another branch
load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
ds_assemble(i+1,&branch_regs[i]);
+ drc_dbg_emit_wb_dirtys(i+1, &branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
if(cc==-1) {
emit_loadreg(CCREG,cc=HOST_CCREG);
out = ndrc->translation_cache;
}
+static int get_cycle_multiplier(void)
+{
+ return Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT
+ ? Config.cycle_multiplier_override : Config.cycle_multiplier;
+}
+
// clear the state completely, instead of just marking
// things invalid like invalidate_all_pages() does
void new_dynarec_clear_full(void)
int n;
out = ndrc->translation_cache;
memset(invalid_code,1,sizeof(invalid_code));
- memset(hash_table,0xff,sizeof(hash_table));
- memset(mini_ht,-1,sizeof(mini_ht));
memset(shadow,0,sizeof(shadow));
+ hash_table_clear();
+ mini_ht_clear();
copy=shadow;
expirep = EXPIRITY_OFFSET;
pending_exception=0;
stat_clear(stat_blocks);
stat_clear(stat_links);
+ if (cycle_multiplier_old != Config.cycle_multiplier
+ || new_dynarec_hacks_old != new_dynarec_hacks)
+ {
+ SysPrintf("ndrc config: mul=%d, ha=%x, pex=%d\n",
+ get_cycle_multiplier(), new_dynarec_hacks, Config.PreciseExceptions);
+ }
cycle_multiplier_old = Config.cycle_multiplier;
new_dynarec_hacks_old = new_dynarec_hacks;
}
*limit = (addr & 0xa0600000) + 0x00200000;
return (u_int *)(psxM + (addr & 0x1fffff));
}
- else if (!Config.HLE && (
+ else if (
/* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/
- (0xbfc00000 <= addr && addr < 0xbfc80000)))
+ (0xbfc00000 <= addr && addr < 0xbfc80000))
{
// BIOS. The multiplier should be much higher as it's uncached 8bit mem,
// but timings in PCSX are too tied to the interpreter's 2-per-insn assumption
#endif
}
+static void force_intcall(int i)
+{
+ memset(&dops[i], 0, sizeof(dops[i]));
+ dops[i].itype = INTCALL;
+ dops[i].rs1 = CCREG;
+ dops[i].is_exception = 1;
+ cinfo[i].ba = -1;
+}
+
static int apply_hacks(void)
{
int i;
return 1;
}
}
+ if (Config.HLE)
+ {
+ if (start <= psxRegs.biosBranchCheck && psxRegs.biosBranchCheck < start + i*4)
+ {
+ i = (psxRegs.biosBranchCheck - start) / 4u + 23;
+ if (dops[i].is_jump && !dops[i+1].bt)
+ {
+ force_intcall(i);
+ dops[i+1].is_ds = 0;
+ }
+ }
+ }
return 0;
}
-static int is_ld_use_hazard(int ld_rt, const struct decoded_insn *op)
-{
- return ld_rt != 0 && (ld_rt == op->rs1 || ld_rt == op->rs2)
- && op->itype != LOADLR && op->itype != CJUMP && op->itype != SJUMP;
-}
-
-static void force_intcall(int i)
+static int is_ld_use_hazard(const struct decoded_insn *op_ld,
+ const struct decoded_insn *op)
{
- memset(&dops[i], 0, sizeof(dops[i]));
- dops[i].itype = INTCALL;
- dops[i].rs1 = CCREG;
- dops[i].is_exception = 1;
- cinfo[i].ba = -1;
+ if (op_ld->rt1 == 0 || (op_ld->rt1 != op->rs1 && op_ld->rt1 != op->rs2))
+ return 0;
+ if (op_ld->itype == LOADLR && op->itype == LOADLR)
+ return op_ld->rt1 == op_ld->rs1;
+ return op->itype != CJUMP && op->itype != SJUMP;
}
static void disassemble_one(int i, u_int src)
else
dop = &dops[t];
}
- if ((dop && is_ld_use_hazard(dops[i].rt1, dop))
+ if ((dop && is_ld_use_hazard(&dops[i], dop))
|| (!dop && Config.PreciseExceptions)) {
// jump target wants DS result - potential load delay effect
SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start);
}
}
else if (i > 0 && dops[i-1].is_delay_load
- && is_ld_use_hazard(dops[i-1].rt1, &dops[i])
+ && is_ld_use_hazard(&dops[i-1], &dops[i])
&& (i < 2 || !dops[i-2].is_ujump)) {
SysPrintf("load delay @%08x (%08x)\n", start + i*4, start);
for (j = i - 1; j > 0 && dops[j-1].is_delay_load; j--)
done = 1;
}
if (dops[i].itype == HLECALL)
- stop = 1;
+ done = 1;
else if (dops[i].itype == INTCALL)
- stop = 2;
+ done = 2;
else if (dops[i].is_exception)
done = stop_after_jal ? 1 : 2;
if (done == 2) {
regs[i+2].wasdirty&=~(1<<hr);
}
assert(hr>=0);
+ #if 0 // what is this for? double allocs $0 in ps1_rom.bin
if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0)
{
regs[i].regmap[hr]=dops[i+1].rs1;
regs[i+1].wasdirty&=~(1<<hr);
regs[i].dirty&=~(1<<hr);
}
+ #endif
}
}
if (dops[i+1].itype == LOADLR || dops[i+1].opcode == 0x32) { // LWC2
hit = blocks_remove_matching_addrs(&blocks[block_i], base_offs, base_shift);
if (hit) {
do_clear_cache();
- #ifdef USE_MINI_HT
- memset(mini_ht, -1, sizeof(mini_ht));
- #endif
+ mini_ht_clear();
}
}
else
new_dynarec_did_compile=1;
if (Config.HLE && start == 0x80001000) // hlecall
{
- // XXX: is this enough? Maybe check hleSoftCall?
void *beginning = start_block();
emit_movimm(start,0);
return 0;
}
- cycle_multiplier_active = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT
- ? Config.cycle_multiplier_override : Config.cycle_multiplier;
+ cycle_multiplier_active = get_cycle_multiplier();
source = get_source_start(start, &pagelimit);
if (source == NULL) {
if (addr != hack_addr) {
- SysPrintf("Compile at bogus memory address: %08x\n", addr);
+ SysPrintf("Compile at bogus memory address: %08x, ra=%x\n",
+ addr, psxRegs.GPR.n.ra);
hack_addr = addr;
}
//abort();
void *instr_addr0_override = NULL;
int ds = 0;
- if (start == 0x80030000) {
- // nasty hack for the fastbios thing
- // override block entry to this code
+ if ((Config.HLE && start == 0x80000080) || start == 0x80030000) {
instr_addr0_override = out;
- emit_movimm(start,0);
- // abuse io address var as a flag that we
- // have already returned here once
- emit_readword(&address,1);
- emit_writeword(0,&pcaddr);
- emit_writeword(0,&address);
- emit_cmp(0,1);
+ emit_movimm(start, 0);
+ if (start == 0x80030000) {
+ // for BiosBootBypass() to work
+ // io address var abused as a "already been here" flag
+ emit_readword(&address, 1);
+ emit_writeword(0, &pcaddr);
+ emit_writeword(0, &address);
+ emit_cmp(0, 1);
+ }
+ else {
+ emit_readword(&psxRegs.cpuInRecursion, 1);
+ emit_writeword(0, &pcaddr);
+ emit_test(1, 1);
+ }
#ifdef __aarch64__
emit_jeq(out + 4*2);
emit_far_jump(new_dyna_leave);
ds = assemble(i, ®s[i], cinfo[i].ccadj);
+ drc_dbg_emit_wb_dirtys(i, ®s[i]);
if (dops[i].is_ujump)
literal_pool(1024);
else