//#define DISASM
//#define ASSEM_PRINT
+//#define STAT_PRINT
#ifdef ASSEM_PRINT
#define assem_debug printf
} dops[MAXBLOCK];
// used by asm:
- u_char *out;
struct ht_entry hash_table[65536] __attribute__((aligned(16)));
struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
- struct ll_entry *jump_dirty[4096];
+ static u_char *out;
+ static struct ll_entry *jump_dirty[4096];
static struct ll_entry *jump_out[4096];
static u_int start;
static u_int *source;
static struct regstat regs[MAXBLOCK];
static struct regstat branch_regs[MAXBLOCK];
static signed char minimum_free_regs[MAXBLOCK];
- static u_int needed_reg[MAXBLOCK];
- static u_int wont_dirty[MAXBLOCK];
- static u_int will_dirty[MAXBLOCK];
static int ccadj[MAXBLOCK];
static int slen;
static void *instr_addr[MAXBLOCK];
static int expirep;
static u_int stop_after_jal;
static u_int f1_hack;
+#ifdef STAT_PRINT
+ static int stat_bc_direct;
+ static int stat_bc_pre;
+ static int stat_bc_restore;
+ static int stat_jump_in_lookups;
+ static int stat_restore_tries;
+ static int stat_restore_compares;
+ static int stat_inv_addr_calls;
+ static int stat_inv_hits;
+ #define stat_inc(s) s++
+#else
+ #define stat_inc(s)
+#endif
int new_dynarec_hacks;
int new_dynarec_hacks_pergame;
extern int branch_target;
extern uintptr_t ram_offset;
extern uintptr_t mini_ht[32][2];
- extern u_char restore_candidate[512];
/* registers that may be allocated */
/* 1-31 gpr */
void *get_addr_ht(u_int vaddr);
void invalidate_block(u_int block);
void invalidate_addr(u_int addr);
-void remove_hash(int vaddr);
void dyna_linker();
void dyna_linker_ds();
void verify_code();
void jump_break_ds(u_int u0, u_int u1, u_int pc);
void jump_to_new_pc();
void call_gteStall();
-void clean_blocks(u_int page);
void add_jump_out(u_int vaddr, void *src);
void new_dyna_leave();
+static void *get_clean_addr(void *addr);
+static void get_bounds(void *addr, u_char **start, u_char **end);
+static void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr);
+
// Needed by assembler
static void wb_register(signed char r, const signed char regmap[], uint64_t dirty);
static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty);
ht_bin->tcaddr[0] = tcaddr;
}
+static void mark_valid_code(u_int vaddr, u_int len)
+{
+ u_int i, j;
+ vaddr &= 0x1fffffff;
+ for (i = vaddr & ~0xfff; i < vaddr + len; i += 0x1000) {
+ // ram mirrors, but should not hurt bios
+ for (j = 0; j < 0x800000; j += 0x200000) {
+ invalid_code[(i|j) >> 12] =
+ invalid_code[(i|j|0x80000000u) >> 12] =
+ invalid_code[(i|j|0xa0000000u) >> 12] = 0;
+ }
+ }
+ inv_code_start = inv_code_end = ~0;
+}
+
// some messy ari64's code, seems to rely on unsigned 32bit overflow
static int doesnt_expire_soon(void *tcaddr)
{
return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2)));
}
+void *ndrc_try_restore_block(u_int vaddr)
+{
+ u_char *source_start = NULL, *source_end = NULL;
+ void *found_stub = NULL, *found_clean = NULL;
+ u_int len, page = get_page(vaddr);
+ const struct ll_entry *head;
+ int ep_count = 0;
+
+ stat_inc(stat_restore_tries);
+ for (head = jump_dirty[page]; head != NULL; head = head->next)
+ {
+ if (head->vaddr != vaddr)
+ continue;
+ // don't restore blocks which are about to expire from the cache
+ if (!doesnt_expire_soon(head->addr))
+ continue;
+ stat_inc(stat_restore_compares);
+ if (!verify_dirty(head->addr))
+ continue;
+
+ found_stub = head->addr;
+ break;
+ }
+ if (!found_stub)
+ return NULL;
+
+ found_clean = get_clean_addr(found_stub);
+ get_bounds(found_stub, &source_start, &source_end);
+ assert(source_start < source_end);
+ len = source_end - source_start;
+ mark_valid_code(vaddr, len);
+
+ // restore all entry points
+ for (head = jump_dirty[page]; head != NULL; head = head->next)
+ {
+ if (head->vaddr < vaddr || head->vaddr >= vaddr + len)
+ continue;
+
+ u_char *start = NULL, *end = NULL;
+ get_bounds(head->addr, &start, &end);
+ if (start != source_start || end != source_end)
+ continue;
+
+ void *clean_addr = get_clean_addr(head->addr);
+ ll_add_flags(jump_in + page, head->vaddr, head->reg_sv_flags, clean_addr);
+
+ int in_ht = 0;
+ struct ht_entry *ht_bin = hash_table_get(head->vaddr);
+ if (ht_bin->vaddr[0] == head->vaddr) {
+ ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
+ in_ht = 1;
+ }
+ if (ht_bin->vaddr[1] == head->vaddr) {
+ ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
+ in_ht = 1;
+ }
+ if (!in_ht)
+ hash_table_add(ht_bin, head->vaddr, clean_addr);
+ ep_count++;
+ }
+ inv_debug("INV: Restored %08x %p (%d)\n", vaddr, found_stub, ep_count);
+ stat_inc(stat_bc_restore);
+ return found_clean;
+}
+
// Get address from virtual address
// This is called from the recompiled JR/JALR instructions
void noinline *get_addr(u_int vaddr)
{
- u_int page=get_page(vaddr);
- u_int vpage=get_vpage(vaddr);
+ u_int page = get_page(vaddr);
struct ll_entry *head;
- //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
- head=jump_in[page];
- while(head!=NULL) {
- if(head->vaddr==vaddr) {
- //printf("TRACE: count=%d next=%d (get_addr match %x: %p)\n",Count,next_interupt,vaddr,head->addr);
+ void *code;
+
+ stat_inc(stat_jump_in_lookups);
+ for (head = jump_in[page]; head != NULL; head = head->next) {
+ if (head->vaddr == vaddr) {
hash_table_add(hash_table_get(vaddr), vaddr, head->addr);
return head->addr;
}
- head=head->next;
}
- head=jump_dirty[vpage];
- while(head!=NULL) {
- if(head->vaddr==vaddr) {
- //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %p)\n",Count,next_interupt,vaddr,head->addr);
- // Don't restore blocks which are about to expire from the cache
- if (doesnt_expire_soon(head->addr))
- if (verify_dirty(head->addr)) {
- //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
- invalid_code[vaddr>>12]=0;
- inv_code_start=inv_code_end=~0;
- if(vpage<2048) {
- restore_candidate[vpage>>3]|=1<<(vpage&7);
- }
- else restore_candidate[page>>3]|=1<<(page&7);
- struct ht_entry *ht_bin = hash_table_get(vaddr);
- if (ht_bin->vaddr[0] == vaddr)
- ht_bin->tcaddr[0] = head->addr; // Replace existing entry
- else
- hash_table_add(ht_bin, vaddr, head->addr);
+ code = ndrc_try_restore_block(vaddr);
+ if (code)
+ return code;
+
+ int r = new_recompile_block(vaddr);
+ if (r == 0)
+ return get_addr(vaddr);
- return head->addr;
- }
- }
- head=head->next;
- }
- //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
- int r=new_recompile_block(vaddr);
- if(r==0) return get_addr(vaddr);
// generate an address error
Status|=2;
Cause=(vaddr<<31)|(4<<2);
memset(regmap, -1, sizeof(regmap[0]) * HOST_REGS);
}
+// get_reg: get allocated host reg from mips reg
+// returns -1 if no such mips reg was allocated
#if defined(__arm__) && defined(HAVE_ARMV6) && HOST_REGS == 13 && EXCLUDE_REG == 11
extern signed char get_reg(const signed char regmap[], signed char r);
#endif
+// get reg as mask bit (1 << hr)
+static u_int get_regm(const signed char regmap[], signed char r)
+{
+ return (1u << (get_reg(regmap, r) & 31)) & ~(1u << 31);
+}
+
static signed char get_reg_temp(const signed char regmap[])
{
int hr;
FUNCNAME(jump_syscall),
FUNCNAME(jump_syscall_ds),
FUNCNAME(call_gteStall),
- FUNCNAME(clean_blocks),
FUNCNAME(new_dyna_leave),
FUNCNAME(pcsx_mtc0),
FUNCNAME(pcsx_mtc0_ds),
return 0;
}
-void remove_hash(int vaddr)
+static void remove_hash(int vaddr)
{
//printf("remove hash: %x\n",vaddr);
struct ht_entry *ht_bin = hash_table_get(vaddr);
struct ll_entry *head;
struct ll_entry *next;
head=jump_in[page];
+ if (head) stat_inc(stat_inv_hits);
jump_in[page]=0;
while(head!=NULL) {
inv_debug("INVALIDATE: %x\n",head->vaddr);
//static int rhits;
// this check is done by the caller
//if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
+ stat_inc(stat_inv_addr_calls);
u_int page=get_vpage(addr);
if(page<2048) { // RAM
struct ll_entry *head;
u_int page;
for(page=0;page<4096;page++)
invalidate_page(page);
- for(page=0;page<1048576;page++)
- if(!invalid_code[page]) {
- restore_candidate[(page&2047)>>3]|=1<<(page&7);
- restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
- }
#ifdef USE_MINI_HT
memset(mini_ht,-1,sizeof(mini_ht));
#endif
//inv_debug("add_jump_out: to %p\n",get_pointer(src));
}
-// If a code block was found to be unmodified (bit was set in
-// restore_candidate) and it remains unmodified (bit is clear
-// in invalid_code) then move the entries for that 4K page from
-// the dirty list to the clean list.
-void clean_blocks(u_int page)
-{
- struct ll_entry *head;
- inv_debug("INV: clean_blocks page=%d\n",page);
- head=jump_dirty[page];
- while(head!=NULL) {
- if(!invalid_code[head->vaddr>>12]) {
- // Don't restore blocks which are about to expire from the cache
- if (doesnt_expire_soon(head->addr)) {
- if(verify_dirty(head->addr)) {
- u_char *start, *end;
- //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr);
- u_int i;
- u_int inv=0;
- get_bounds(head->addr, &start, &end);
- if (start - rdram < RAM_SIZE) {
- for (i = (start-rdram+0x80000000)>>12; i <= (end-1-rdram+0x80000000)>>12; i++) {
- inv|=invalid_code[i];
- }
- }
- else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
- inv=1;
- }
- if(!inv) {
- void *clean_addr = get_clean_addr(head->addr);
- if (doesnt_expire_soon(clean_addr)) {
- u_int ppage=page;
- inv_debug("INV: Restored %x (%p/%p)\n",head->vaddr, head->addr, clean_addr);
- //printf("page=%x, addr=%x\n",page,head->vaddr);
- //assert(head->vaddr>>12==(page|0x80000));
- ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
- struct ht_entry *ht_bin = hash_table_get(head->vaddr);
- if (ht_bin->vaddr[0] == head->vaddr)
- ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
- if (ht_bin->vaddr[1] == head->vaddr)
- ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
- }
- }
- }
- }
- }
- head=head->next;
- }
-}
-
/* Register allocation */
// Note: registers are allocated clean (unmodified state)
if (reg == CCREG) preferred_reg = HOST_CCREG;
if (reg == PTEMP || reg == FTEMP) preferred_reg = 12;
assert(PREFERRED_REG_FIRST != EXCLUDE_REG && EXCLUDE_REG != HOST_REGS);
+ assert(reg >= 0);
// Don't allocate unused registers
if((cur->u>>reg)&1) return;
// see if it's already allocated
- for(hr=0;hr<HOST_REGS;hr++)
- {
- if(cur->regmap[hr]==reg) return;
- }
+ if (get_reg(cur->regmap, reg) >= 0)
+ return;
// Keep the same mapping if the register was already allocated in a loop
preferred_reg = loop_reg(i,reg,preferred_reg);
static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u)
{
//if(dirty_pre==dirty) return;
- int hr,reg;
- for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG) {
- reg=pre[hr];
- if(((~u)>>reg)&1) {
- if(reg>0) {
- if(((dirty_pre&~dirty)>>hr)&1) {
- if(reg>0&®<34) {
- emit_storereg(reg,hr);
- }
- else if(reg>=64) {
- assert(0);
- }
- }
- }
- }
- }
+ int hr, r;
+ for (hr = 0; hr < HOST_REGS; hr++) {
+ r = pre[hr];
+ if (r < 1 || r > 33 || ((u >> r) & 1))
+ continue;
+ if (((dirty_pre & ~dirty) >> hr) & 1)
+ emit_storereg(r, hr);
}
}
// Load the specified registers
// This only loads the registers given as arguments because
// we don't want to load things that will be overwritten
-static void load_regs(signed char entry[],signed char regmap[],int rs1,int rs2)
+static inline void load_reg(signed char entry[], signed char regmap[], int rs)
{
- int hr;
- // Load 32-bit regs
- for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG&®map[hr]>=0) {
- if(entry[hr]!=regmap[hr]) {
- if(regmap[hr]==rs1||regmap[hr]==rs2)
- {
- if(regmap[hr]==0) {
- emit_zeroreg(hr);
- }
- else
- {
- emit_loadreg(regmap[hr],hr);
- }
- }
- }
- }
- }
+ int hr = get_reg(regmap, rs);
+ if (hr >= 0 && entry[hr] != regmap[hr])
+ emit_loadreg(regmap[hr], hr);
+}
+
+static void load_regs(signed char entry[], signed char regmap[], int rs1, int rs2)
+{
+ load_reg(entry, regmap, rs1);
+ if (rs1 != rs2)
+ load_reg(entry, regmap, rs2);
}
// Load registers prior to the start of a loop
static void loop_preload(signed char pre[],signed char entry[])
{
int hr;
- for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG) {
- if(pre[hr]!=entry[hr]) {
- if(entry[hr]>=0) {
- if(get_reg(pre,entry[hr])<0) {
- assem_debug("loop preload:\n");
- //printf("loop preload: %d\n",hr);
- if(entry[hr]==0) {
- emit_zeroreg(hr);
- }
- else if(entry[hr]<TEMPREG)
- {
- emit_loadreg(entry[hr],hr);
- }
- else if(entry[hr]-64<TEMPREG)
- {
- emit_loadreg(entry[hr],hr);
- }
- }
- }
- }
+ for (hr = 0; hr < HOST_REGS; hr++) {
+ int r = entry[hr];
+ if (r >= 0 && pre[hr] != r && get_reg(pre, r) < 0) {
+ assem_debug("loop preload:\n");
+ if (r < TEMPREG)
+ emit_loadreg(r, hr);
}
}
}
load_regs(regs[t].regmap_entry,regs[t].regmap,dops[t].rs1,dops[t].rs2);
address_generation(t,®s[t],regs[t].regmap_entry);
if (ram_offset && (dops[t].is_load || dops[t].is_store))
- load_regs(regs[t].regmap_entry,regs[t].regmap,ROREG,ROREG);
+ load_reg(regs[t].regmap_entry,regs[t].regmap,ROREG);
if (dops[t].is_store)
- load_regs(regs[t].regmap_entry,regs[t].regmap,INVCP,INVCP);
+ load_reg(regs[t].regmap_entry,regs[t].regmap,INVCP);
is_delayslot=0;
switch (dops[t].itype) {
case SYSCALL:
uint64_t bc_unneeded=branch_regs[i].u;
bc_unneeded|=1|(1LL<<dops[i].rt1);
wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,bc_unneeded);
- load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG);
+ load_reg(regs[i].regmap,branch_regs[i].regmap,CCREG);
if(!ra_done&&dops[i].rt1==31)
ujump_assemble_write_ra(i);
int cc,adj;
bc_unneeded|=1;
wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,bc_unneeded);
load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i].rs1,dops[i].rs2);
- load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG);
+ load_reg(regs[i].regmap,branch_regs[i].regmap,CCREG);
cc=get_reg(branch_regs[i].regmap,CCREG);
assert(cc==HOST_CCREG);
if(unconditional)
load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2);
address_generation(i+1,&branch_regs[i],0);
if (ram_offset)
- load_regs(regs[i].regmap,branch_regs[i].regmap,ROREG,ROREG);
+ load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
ds_assemble(i+1,&branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2);
address_generation(i+1,&branch_regs[i],0);
if (ram_offset)
- load_regs(regs[i].regmap,branch_regs[i].regmap,ROREG,ROREG);
+ load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
ds_assemble(i+1,&branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
bc_unneeded|=1;
wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,bc_unneeded);
load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i].rs1,dops[i].rs1);
- load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,CCREG);
+ load_reg(regs[i].regmap,branch_regs[i].regmap,CCREG);
if(dops[i].rt1==31) {
int rt,return_address;
rt=get_reg(branch_regs[i].regmap,31);
load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2);
address_generation(i+1,&branch_regs[i],0);
if (ram_offset)
- load_regs(regs[i].regmap,branch_regs[i].regmap,ROREG,ROREG);
+ load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
ds_assemble(i+1,&branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,dops[i+1].rs1,dops[i+1].rs2);
address_generation(i+1,&branch_regs[i],0);
if (ram_offset)
- load_regs(regs[i].regmap,branch_regs[i].regmap,ROREG,ROREG);
+ load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
ds_assemble(i+1,&branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
}
assert(hr<HOST_REGS);
if((dops[i].opcode&0x2e)==4||dops[i].opcode==0x11) { // BEQ/BNE/BEQL/BNEL/BC1
- load_regs(regs[i].regmap_entry,regs[i].regmap,CCREG,CCREG);
+ load_reg(regs[i].regmap_entry,regs[i].regmap,CCREG);
}
emit_addimm(HOST_CCREG, ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG);
if(dops[i].opcode==2) // J
load_regs(regs[0].regmap_entry,regs[0].regmap,dops[0].rs1,dops[0].rs2);
address_generation(0,®s[0],regs[0].regmap_entry);
if (ram_offset && (dops[0].is_load || dops[0].is_store))
- load_regs(regs[0].regmap_entry,regs[0].regmap,ROREG,ROREG);
+ load_reg(regs[0].regmap_entry,regs[0].regmap,ROREG);
if (dops[0].is_store)
- load_regs(regs[0].regmap_entry,regs[0].regmap,INVCP,INVCP);
+ load_reg(regs[0].regmap_entry,regs[0].regmap,INVCP);
is_delayslot=0;
switch (dops[0].itype) {
case SYSCALL:
memset(invalid_code,1,sizeof(invalid_code));
memset(hash_table,0xff,sizeof(hash_table));
memset(mini_ht,-1,sizeof(mini_ht));
- memset(restore_candidate,0,sizeof(restore_candidate));
memset(shadow,0,sizeof(shadow));
copy=shadow;
expirep=16384; // Expiry pointer, +2 blocks
ram_offset=(uintptr_t)rdram-0x80000000;
if (ram_offset!=0)
SysPrintf("warning: RAM is not directly mapped, performance will suffer\n");
+ SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n");
+ SysPrintf("%p/%p/%p/%p/%p\n", psxM, psxH, psxR, mem_rtab, out);
}
void new_dynarec_cleanup(void)
#ifdef ROM_COPY
if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");}
#endif
+ new_dynarec_print_stats();
}
static u_int *get_source_start(u_int addr, u_int *limit)
memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save));
}
+void new_dynarec_print_stats(void)
+{
+#ifdef STAT_PRINT
+ printf("cc %3d,%3d,%3d lu%3d,%3d c%3d inv%3d,%3d tc_offs %zu\n",
+ stat_bc_pre, stat_bc_direct, stat_bc_restore,
+ stat_jump_in_lookups, stat_restore_tries, stat_restore_compares,
+ stat_inv_addr_calls, stat_inv_hits,
+ out - ndrc->translation_cache);
+ stat_bc_direct = stat_bc_pre = stat_bc_restore =
+ stat_jump_in_lookups = stat_restore_tries = stat_restore_compares =
+ stat_inv_addr_calls = stat_inv_hits = 0;
+#endif
+}
+
static int apply_hacks(void)
{
int i;
/* Is this the end of the block? */
if (i > 0 && dops[i-1].is_ujump) {
- if(dops[i-1].rt1==0) { // Continue past subroutine call (JAL)
- done=2;
+ if (dops[i-1].rt1 == 0) { // not jal
+ int found_bbranch = 0, t = (ba[i-1] - start) / 4;
+ if ((u_int)(t - i) < 64 && start + (t+64)*4 < pagelimit) {
+ // scan for a branch back to i+1
+ for (j = t; j < t + 64; j++) {
+ int tmpop = source[j] >> 26;
+ if (tmpop == 1 || ((tmpop & ~3) == 4)) {
+ int t2 = j + 1 + (int)(signed short)source[j];
+ if (t2 == i + 1) {
+ //printf("blk expand %08x<-%08x\n", start + (i+1)*4, start + j*4);
+ found_bbranch = 1;
+ break;
+ }
+ }
+ }
+ }
+ if (!found_bbranch)
+ done = 2;
}
else {
if(stop_after_jal) done=1;
static noinline void pass4_cull_unused_regs(void)
{
+ u_int last_needed_regs[4] = {0,0,0,0};
u_int nr=0;
int i;
for (i=slen-1;i>=0;i--)
{
int hr;
+ __builtin_prefetch(regs[i-2].regmap);
if(dops[i].is_jump)
{
if(ba[i]<start || ba[i]>=(start+slen*4))
if (!dops[i].is_ujump)
{
if(i<slen-2) {
- nr|=needed_reg[i+2];
+ nr |= last_needed_regs[(i+2) & 3];
for(hr=0;hr<HOST_REGS;hr++)
{
if(regmap_pre[i+2][hr]>=0&&get_reg(regs[i+2].regmap_entry,regmap_pre[i+2][hr])<0) nr&=~(1<<hr);
//if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
//if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
// Merge in delay slot
- for(hr=0;hr<HOST_REGS;hr++)
- {
- if(dops[i+1].rt1&&dops[i+1].rt1==regs[i].regmap[hr]) nr&=~(1<<hr);
- if(dops[i+1].rt2&&dops[i+1].rt2==regs[i].regmap[hr]) nr&=~(1<<hr);
- if(dops[i+1].rs1==regmap_pre[i][hr]) nr|=1<<hr;
- if(dops[i+1].rs2==regmap_pre[i][hr]) nr|=1<<hr;
- if(dops[i+1].rs1==regs[i].regmap_entry[hr]) nr|=1<<hr;
- if(dops[i+1].rs2==regs[i].regmap_entry[hr]) nr|=1<<hr;
- if(ram_offset && (dops[i+1].is_load || dops[i+1].is_store)) {
- if(regmap_pre[i][hr]==ROREG) nr|=1<<hr;
- if(regs[i].regmap_entry[hr]==ROREG) nr|=1<<hr;
- }
- if(dops[i+1].is_store) {
- if(regmap_pre[i][hr]==INVCP) nr|=1<<hr;
- if(regs[i].regmap_entry[hr]==INVCP) nr|=1<<hr;
- }
+ if (dops[i+1].rt1) nr &= ~get_regm(regs[i].regmap, dops[i+1].rt1);
+ if (dops[i+1].rt2) nr &= ~get_regm(regs[i].regmap, dops[i+1].rt2);
+ nr |= get_regm(regmap_pre[i], dops[i+1].rs1);
+ nr |= get_regm(regmap_pre[i], dops[i+1].rs2);
+ nr |= get_regm(regs[i].regmap_entry, dops[i+1].rs1);
+ nr |= get_regm(regs[i].regmap_entry, dops[i+1].rs2);
+ if (ram_offset && (dops[i+1].is_load || dops[i+1].is_store)) {
+ nr |= get_regm(regmap_pre[i], ROREG);
+ nr |= get_regm(regs[i].regmap_entry, ROREG);
+ }
+ if (dops[i+1].is_store) {
+ nr |= get_regm(regmap_pre[i], INVCP);
+ nr |= get_regm(regs[i].regmap_entry, INVCP);
}
}
else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL)
}
}
}
+ // Overwritten registers are not needed
+ if (dops[i].rt1) nr &= ~get_regm(regs[i].regmap, dops[i].rt1);
+ if (dops[i].rt2) nr &= ~get_regm(regs[i].regmap, dops[i].rt2);
+ nr &= ~get_regm(regs[i].regmap, FTEMP);
+ // Source registers are needed
+ nr |= get_regm(regmap_pre[i], dops[i].rs1);
+ nr |= get_regm(regmap_pre[i], dops[i].rs2);
+ nr |= get_regm(regs[i].regmap_entry, dops[i].rs1);
+ nr |= get_regm(regs[i].regmap_entry, dops[i].rs2);
+ if (ram_offset && (dops[i].is_load || dops[i].is_store)) {
+ nr |= get_regm(regmap_pre[i], ROREG);
+ nr |= get_regm(regs[i].regmap_entry, ROREG);
+ }
+ if (dops[i].is_store) {
+ nr |= get_regm(regmap_pre[i], INVCP);
+ nr |= get_regm(regs[i].regmap_entry, INVCP);
+ }
+
+ if (i > 0 && !dops[i].bt && regs[i].wasdirty)
for(hr=0;hr<HOST_REGS;hr++)
{
- // Overwritten registers are not needed
- if(dops[i].rt1&&dops[i].rt1==regs[i].regmap[hr]) nr&=~(1<<hr);
- if(dops[i].rt2&&dops[i].rt2==regs[i].regmap[hr]) nr&=~(1<<hr);
- if(FTEMP==regs[i].regmap[hr]) nr&=~(1<<hr);
- // Source registers are needed
- if(dops[i].rs1==regmap_pre[i][hr]) nr|=1<<hr;
- if(dops[i].rs2==regmap_pre[i][hr]) nr|=1<<hr;
- if(dops[i].rs1==regs[i].regmap_entry[hr]) nr|=1<<hr;
- if(dops[i].rs2==regs[i].regmap_entry[hr]) nr|=1<<hr;
- if(ram_offset && (dops[i].is_load || dops[i].is_store)) {
- if(regmap_pre[i][hr]==ROREG) nr|=1<<hr;
- if(regs[i].regmap_entry[hr]==ROREG) nr|=1<<hr;
- }
- if(dops[i].is_store) {
- if(regmap_pre[i][hr]==INVCP) nr|=1<<hr;
- if(regs[i].regmap_entry[hr]==INVCP) nr|=1<<hr;
- }
// Don't store a register immediately after writing it,
// may prevent dual-issue.
// But do so if this is a branch target, otherwise we
// might have to load the register before the branch.
- if(i>0&&!dops[i].bt&&((regs[i].wasdirty>>hr)&1)) {
+ if((regs[i].wasdirty>>hr)&1) {
if((regmap_pre[i][hr]>0&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) {
if(dops[i-1].rt1==regmap_pre[i][hr]) nr|=1<<hr;
if(dops[i-1].rt2==regmap_pre[i][hr]) nr|=1<<hr;
if(regs[i].regmap_entry[HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
}
// Save it
- needed_reg[i]=nr;
+ last_needed_regs[i & 3] = nr;
// Deallocate unneeded registers
for(hr=0;hr<HOST_REGS;hr++)
// so that we don't end up with lots of writes at the branches.
static noinline void pass6_clean_registers(int istart, int iend, int wr)
{
+ static u_int wont_dirty[MAXBLOCK];
+ static u_int will_dirty[MAXBLOCK];
int i;
int r;
u_int will_dirty_i,will_dirty_next,temp_will_dirty;
if(dops[i+1].rs2!=dops[i+1].rs1&&dops[i+1].rs2!=dops[i].rs1&&dops[i+1].rs2!=dops[i].rs2&&(dops[i+1].rs2!=dops[i].rt1||dops[i].rt1==0))
load_regs(regs[i].regmap_entry,regs[i].regmap,dops[i+1].rs2,dops[i+1].rs2);
if (ram_offset && (dops[i+1].is_load || dops[i+1].is_store))
- load_regs(regs[i].regmap_entry,regs[i].regmap,ROREG,ROREG);
+ load_reg(regs[i].regmap_entry,regs[i].regmap,ROREG);
if (dops[i+1].is_store)
- load_regs(regs[i].regmap_entry,regs[i].regmap,INVCP,INVCP);
+ load_reg(regs[i].regmap_entry,regs[i].regmap,INVCP);
}
else if(i+1<slen)
{
}
// TODO: if(is_ooo(i)) address_generation(i+1);
if (!dops[i].is_jump || dops[i].itype == CJUMP)
- load_regs(regs[i].regmap_entry,regs[i].regmap,CCREG,CCREG);
+ load_reg(regs[i].regmap_entry,regs[i].regmap,CCREG);
if (ram_offset && (dops[i].is_load || dops[i].is_store))
- load_regs(regs[i].regmap_entry,regs[i].regmap,ROREG,ROREG);
+ load_reg(regs[i].regmap_entry,regs[i].regmap,ROREG);
if (dops[i].is_store)
- load_regs(regs[i].regmap_entry,regs[i].regmap,INVCP,INVCP);
+ load_reg(regs[i].regmap_entry,regs[i].regmap,INVCP);
ds = assemble(i, ®s[i], ccadj[i]);
out = ndrc->translation_cache;
// Trap writes to any of the pages we compiled
- for(i=start>>12;i<=(start+slen*4)>>12;i++) {
- invalid_code[i]=0;
- }
- inv_code_start=inv_code_end=~0;
-
- // for PCSX we need to mark all mirrors too
- if(get_page(start)<(RAM_SIZE>>12))
- for(i=start>>12;i<=(start+slen*4)>>12;i++)
- invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
- invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
- invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
+ mark_valid_code(start, slen*4);
/* Pass 10 - Free memory by expiring oldest blocks */
#ifdef ASSEM_PRINT
fflush(stdout);
#endif
+ stat_inc(stat_bc_direct);
return 0;
}