struct ll_entry *next;
};
- u_int start;
- u_int *source;
- char insn[MAXBLOCK][10];
- u_char itype[MAXBLOCK];
- u_char opcode[MAXBLOCK];
- u_char opcode2[MAXBLOCK];
- u_char bt[MAXBLOCK];
- u_char rs1[MAXBLOCK];
- u_char rs2[MAXBLOCK];
- u_char rt1[MAXBLOCK];
- u_char rt2[MAXBLOCK];
- u_char us1[MAXBLOCK];
- u_char us2[MAXBLOCK];
- u_char dep1[MAXBLOCK];
- u_char dep2[MAXBLOCK];
- u_char lt1[MAXBLOCK];
+ // used by asm:
+ u_char *out;
+ u_int hash_table[65536][4] __attribute__((aligned(16)));
+ struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
+ struct ll_entry *jump_dirty[4096];
+
+ static struct ll_entry *jump_out[4096];
+ static u_int start;
+ static u_int *source;
+ static char insn[MAXBLOCK][10];
+ static u_char itype[MAXBLOCK];
+ static u_char opcode[MAXBLOCK];
+ static u_char opcode2[MAXBLOCK];
+ static u_char bt[MAXBLOCK];
+ static u_char rs1[MAXBLOCK];
+ static u_char rs2[MAXBLOCK];
+ static u_char rt1[MAXBLOCK];
+ static u_char rt2[MAXBLOCK];
+ static u_char us1[MAXBLOCK];
+ static u_char us2[MAXBLOCK];
+ static u_char dep1[MAXBLOCK];
+ static u_char dep2[MAXBLOCK];
+ static u_char lt1[MAXBLOCK];
static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
static uint64_t gte_rt[MAXBLOCK];
static uint64_t gte_unneeded[MAXBLOCK];
static u_int smrv_weak; // same, but somewhat less likely
static u_int smrv_strong_next; // same, but after current insn executes
static u_int smrv_weak_next;
- int imm[MAXBLOCK];
- u_int ba[MAXBLOCK];
- char likely[MAXBLOCK];
- char is_ds[MAXBLOCK];
- char ooo[MAXBLOCK];
- uint64_t unneeded_reg[MAXBLOCK];
- uint64_t unneeded_reg_upper[MAXBLOCK];
- uint64_t branch_unneeded_reg[MAXBLOCK];
- uint64_t branch_unneeded_reg_upper[MAXBLOCK];
- uint64_t p32[MAXBLOCK];
- uint64_t pr32[MAXBLOCK];
- signed char regmap_pre[MAXBLOCK][HOST_REGS];
+ static int imm[MAXBLOCK];
+ static u_int ba[MAXBLOCK];
+ static char likely[MAXBLOCK];
+ static char is_ds[MAXBLOCK];
+ static char ooo[MAXBLOCK];
+ static uint64_t unneeded_reg[MAXBLOCK];
+ static uint64_t unneeded_reg_upper[MAXBLOCK];
+ static uint64_t branch_unneeded_reg[MAXBLOCK];
+ static uint64_t branch_unneeded_reg_upper[MAXBLOCK];
+ static signed char regmap_pre[MAXBLOCK][HOST_REGS];
static uint64_t current_constmap[HOST_REGS];
static uint64_t constmap[MAXBLOCK][HOST_REGS];
static struct regstat regs[MAXBLOCK];
static struct regstat branch_regs[MAXBLOCK];
- signed char minimum_free_regs[MAXBLOCK];
- u_int needed_reg[MAXBLOCK];
- uint64_t requires_32bit[MAXBLOCK];
- u_int wont_dirty[MAXBLOCK];
- u_int will_dirty[MAXBLOCK];
- int ccadj[MAXBLOCK];
- int slen;
- u_int instr_addr[MAXBLOCK];
- u_int link_addr[MAXBLOCK][3];
- int linkcount;
- u_int stubs[MAXBLOCK*3][8];
- int stubcount;
- u_int literals[1024][2];
- int literalcount;
- int is_delayslot;
- int cop1_usable;
- u_char *out;
- struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
- struct ll_entry *jump_out[4096];
- struct ll_entry *jump_dirty[4096];
- u_int hash_table[65536][4] __attribute__((aligned(16)));
- char shadow[1048576] __attribute__((aligned(16)));
- void *copy;
- int expirep;
- static const u_int using_tlb=0;
- int new_dynarec_did_compile;
- int new_dynarec_hacks;
- u_int stop_after_jal;
+ static signed char minimum_free_regs[MAXBLOCK];
+ static u_int needed_reg[MAXBLOCK];
+ static u_int wont_dirty[MAXBLOCK];
+ static u_int will_dirty[MAXBLOCK];
+ static int ccadj[MAXBLOCK];
+ static int slen;
+ static u_int instr_addr[MAXBLOCK];
+ static u_int link_addr[MAXBLOCK][3];
+ static int linkcount;
+ static u_int stubs[MAXBLOCK*3][8];
+ static int stubcount;
+ static u_int literals[1024][2];
+ static int literalcount;
+ static int is_delayslot;
+ static int cop1_usable;
+ static char shadow[1048576] __attribute__((aligned(16)));
+ static void *copy;
+ static int expirep;
+ static u_int stop_after_jal;
#ifndef RAM_FIXED
static u_int ram_offset;
#else
static const u_int ram_offset=0;
#endif
+
+ int new_dynarec_hacks;
+ int new_dynarec_did_compile;
extern u_char restore_candidate[512];
extern int cycle_count;
#define CSREG 35 // Coprocessor status
#define CCREG 36 // Cycle count
#define INVCP 37 // Pointer to invalid_code
-#define MMREG 38 // Pointer to memory_map
+//#define MMREG 38 // Pointer to memory_map
#define ROREG 39 // ram offset (if rdram!=0x80000000)
#define TEMPREG 40
#define FTEMP 40 // FPU temporary register
#define PTEMP 41 // Prefetch temporary register
-#define TLREG 42 // TLB mapping offset
+//#define TLREG 42 // TLB mapping offset
#define RHASH 43 // Return address hash
#define RHTBL 44 // Return address hash table address
#define RTEMP 45 // JR/JALR address register
#define MAXREG 45
#define AGEN1 46 // Address generation temporary register
-#define AGEN2 47 // Address generation temporary register
-#define MGEN1 48 // Maptable address generation temporary register
-#define MGEN2 49 // Maptable address generation temporary register
+//#define AGEN2 47 // Address generation temporary register
+//#define MGEN1 48 // Maptable address generation temporary register
+//#define MGEN2 49 // Maptable address generation temporary register
#define BTREG 50 // Branch target temporary register
/* instruction types */
#define STORE 2 // Store
#define LOADLR 3 // Unaligned load
#define STORELR 4 // Unaligned store
-#define MOV 5 // Move
+#define MOV 5 // Move
#define ALU 6 // Arithmetic/logic
#define MULTDIV 7 // Multiply/divide
#define SHIFT 8 // Shift by register
void invalidate_block(u_int block);
void invalidate_addr(u_int addr);
void remove_hash(int vaddr);
-void jump_vaddr();
void dyna_linker();
void dyna_linker_ds();
void verify_code();
void cc_interrupt();
void fp_exception();
void fp_exception_ds();
-void jump_syscall();
void jump_syscall_hle();
-void jump_eret();
void jump_hlecall();
void jump_intcall();
void new_dyna_leave();
-// TLB
-void TLBWI_new();
-void TLBWR_new();
-void read_nomem_new();
-void read_nomemb_new();
-void read_nomemh_new();
-void read_nomemd_new();
-void write_nomem_new();
-void write_nomemb_new();
-void write_nomemh_new();
-void write_nomemd_new();
-void write_rdram_new();
-void write_rdramb_new();
-void write_rdramh_new();
-void write_rdramd_new();
-extern u_int memory_map[1048576];
-
// Needed by assembler
-void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
-void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
-void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
-void load_all_regs(signed char i_regmap[]);
-void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
-void load_regs_entry(int t);
-void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
+static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
+static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
+static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
+static void load_all_regs(signed char i_regmap[]);
+static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
+static void load_regs_entry(int t);
+static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
-int tracedebug=0;
+static int verify_dirty(u_int *ptr);
+static int get_final_value(int hr, int i, int *value);
+static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e);
+static void add_to_linker(int addr,int target,int ext);
+
+static int tracedebug=0;
//#define DEBUG_CYCLE_COUNT 1
for (hr=0;hr<HOST_REGS;hr++) {
if((cur->dirty>>hr)&1) {
reg=cur->regmap[hr];
- if(reg>=64)
+ if(reg>=64)
if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
}
}
if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
hsn[FTEMP]=0;
}
- // Don't remove the TLB registers either
- if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS || itype[i]==C2LS) {
- hsn[TLREG]=0;
- }
// Don't remove the miniht registers
if(itype[i]==UJUMP||itype[i]==RJUMP)
{
int j;
int b=-1;
int rn=10;
-
+
if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
{
if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
void alloc_all(struct regstat *cur,int i)
{
int hr;
-
+
for(hr=0;hr<HOST_REGS;hr++) {
if(hr!=EXCLUDE_REG) {
if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
{
struct ll_entry *next;
while(*head) {
- if(((u_int)((*head)->addr)>>shift)==(addr>>shift) ||
+ if(((u_int)((*head)->addr)>>shift)==(addr>>shift) ||
((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
{
inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
#ifdef __arm__
do_clear_cache();
#endif
-
+
// Don't trap writes
invalid_code[block]=1;
}
else current->is32|=1LL<<rt1[i];
dirty_reg(current,rt1[i]);
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
// LWL/LWR need a temporary register for the old value
if(opcode[i]==0x22||opcode[i]==0x26)
{
{
alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
}
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
alloc_reg_temp(current,i,-1);
minimum_free_regs[i]=1;
if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
alloc_reg64(current,i,rs2[i]);
if(rs2[i]) alloc_reg(current,i,FTEMP);
}
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
#if defined(HOST_IMM8)
// On CPUs without 32-bit immediates we need a pointer to invalid_code
else alloc_reg(current,i,INVCP);
if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
alloc_reg64(current,i,FTEMP);
}
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
#if defined(HOST_IMM8)
// On CPUs without 32-bit immediates we need a pointer to invalid_code
else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
clear_const(current,rt1[i]);
if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
alloc_reg(current,i,FTEMP);
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
#if defined(HOST_IMM8)
// On CPUs without 32-bit immediates we need a pointer to invalid_code
- else if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
+ if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
alloc_reg(current,i,INVCP);
#endif
// We need a temporary register for address generation
//else ...
}
-add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
+static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
{
stubs[stubcount][0]=type;
stubs[stubcount][1]=addr;
//printf("TRACE: %x\n",(&i)[-1]);
}
-void tlb_debug(u_int cause, u_int addr, u_int iaddr)
-{
- printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
-}
-
void alu_assemble(int i,struct regstat *i_regs)
{
if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
c=(i_regs->wasconst>>s)&1;
if (c) {
memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
- if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
}
}
//printf("load_assemble: c=%d\n",c);
assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
reglist&=~(1<<tl);
if(th>=0) reglist&=~(1<<th);
- if(!using_tlb) {
- if(!c) {
- #ifdef RAM_OFFSET
- map=get_reg(i_regs->regmap,ROREG);
- if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
- #endif
-//#define R29_HACK 1
- #ifdef R29_HACK
- // Strmnnrmn's speed hack
- if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
- #endif
- {
- jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
- }
+ if(!c) {
+ #ifdef RAM_OFFSET
+ map=get_reg(i_regs->regmap,ROREG);
+ if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
+ #endif
+ #ifdef R29_HACK
+ // Strmnnrmn's speed hack
+ if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
+ #endif
+ {
+ jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
}
- else if(ram_offset&&memtarget) {
- emit_addimm(addr,ram_offset,HOST_TEMPREG);
- fastload_reg_override=HOST_TEMPREG;
- }
- }else{ // using tlb
- int x=0;
- if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
- if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
- map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- reglist&=~(1<<map);
- map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
- do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
+ }
+ else if(ram_offset&&memtarget) {
+ emit_addimm(addr,ram_offset,HOST_TEMPREG);
+ fastload_reg_override=HOST_TEMPREG;
}
int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
if (opcode[i]==0x20) { // LB
#endif
{
//emit_xorimm(addr,3,tl);
- //gen_tlb_addr_r(tl,map);
//emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
int x=0,a=tl;
#ifdef BIG_ENDIAN_MIPS
//emit_movswl_indexed_tlb(x,tl,map,tl);
//else
if(map>=0) {
- gen_tlb_addr_r(a,map);
emit_movswl_indexed(x,a,tl);
}else{
#if 1 //def RAM_OFFSET
#endif
{
//emit_xorimm(addr,3,tl);
- //gen_tlb_addr_r(tl,map);
//emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
int x=0,a=tl;
#ifdef BIG_ENDIAN_MIPS
//emit_movzwl_indexed_tlb(x,tl,map,tl);
//#else
if(map>=0) {
- gen_tlb_addr_r(a,map);
emit_movzwl_indexed(x,a,tl);
}else{
#if 1 //def RAM_OFFSET
if(!dummy) {
int a=addr;
if(fastload_reg_override) a=fastload_reg_override;
- //gen_tlb_addr_r(tl,map);
//if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
//emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
#ifdef HOST_IMM_ADDR32
c=(i_regs->wasconst>>s)&1;
if(c) {
memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
- if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
}
}
assert(tl>=0);
if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
if(offset||s<0||c) addr=temp;
else addr=s;
- if(!using_tlb) {
- if(!c) {
- jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
- }
- else if(ram_offset&&memtarget) {
- emit_addimm(addr,ram_offset,HOST_TEMPREG);
- faststore_reg_override=HOST_TEMPREG;
- }
- }else{ // using tlb
- int x=0;
- if (opcode[i]==0x28) x=3; // SB
- if (opcode[i]==0x29) x=2; // SH
- map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- reglist&=~(1<<map);
- map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
- do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
+ if(!c) {
+ jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
+ }
+ else if(ram_offset&&memtarget) {
+ emit_addimm(addr,ram_offset,HOST_TEMPREG);
+ faststore_reg_override=HOST_TEMPREG;
}
if (opcode[i]==0x28) { // SB
if(!c) a=addr;
#endif
if(faststore_reg_override) a=faststore_reg_override;
- //gen_tlb_addr_w(temp,map);
//emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
emit_writebyte_indexed_tlb(tl,x,a,map,a);
}
//emit_writehword_indexed_tlb(tl,x,temp,map,temp);
//#else
if(map>=0) {
- gen_tlb_addr_w(a,map);
emit_writehword_indexed(tl,x,a);
}else
//emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
jaddr=0;
}
- if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
+ if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
if(!c||memtarget) {
#ifdef DESTRUCTIVE_SHIFT
// The x86 shift operation is 'destructive'; it overwrites the
c=(i_regs->isconst>>s)&1;
if(c) {
memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
- if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
}
}
assert(tl>=0);
if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
}
assert(temp>=0);
- if(!using_tlb) {
- if(!c) {
- emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
- if(!offset&&s!=temp) emit_mov(s,temp);
- jaddr=(int)out;
- emit_jno(0);
- }
- else
- {
- if(!memtarget||!rs1[i]) {
- jaddr=(int)out;
- emit_jmp(0);
- }
- }
- #ifdef RAM_OFFSET
- int map=get_reg(i_regs->regmap,ROREG);
- if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
- gen_tlb_addr_w(temp,map);
- #else
- if((u_int)rdram!=0x80000000)
- emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
- #endif
- }else{ // using tlb
- int map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- reglist&=~(1<<map);
- map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
- if(!c&&!offset&&s>=0) emit_mov(s,temp);
- do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
- if(!jaddr&&!memtarget) {
+ if(!c) {
+ emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
+ if(!offset&&s!=temp) emit_mov(s,temp);
+ jaddr=(int)out;
+ emit_jno(0);
+ }
+ else
+ {
+ if(!memtarget||!rs1[i]) {
jaddr=(int)out;
emit_jmp(0);
}
- gen_tlb_addr_w(temp,map);
}
+ #ifdef RAM_OFFSET
+ int map=get_reg(i_regs->regmap,ROREG);
+ if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
+ #else
+ if((u_int)rdram!=0x80000000)
+ emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
+ #endif
if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
temp2=get_reg(i_regs->regmap,FTEMP);
}
if(!c||!memtarget)
add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
- if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
+ if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
#ifdef RAM_OFFSET
int map=get_reg(i_regs->regmap,ROREG);
if(map<0) map=HOST_TEMPREG;
offset=imm[i];
assert(rs1[i]>0);
assert(tl>=0);
- assert(!using_tlb);
for(hr=0;hr<HOST_REGS;hr++) {
if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
if(itype[i]==LOAD||itype[i]==LOADLR||itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS||itype[i]==C2LS) {
int ra=-1;
int agr=AGEN1+(i&1);
- int mgr=MGEN1+(i&1);
if(itype[i]==LOAD) {
ra=get_reg(i_regs->regmap,rt1[i]);
- if(ra<0) ra=get_reg(i_regs->regmap,-1);
+ if(ra<0) ra=get_reg(i_regs->regmap,-1);
assert(ra>=0);
}
if(itype[i]==LOADLR) {
}
}
int rs=get_reg(i_regs->regmap,rs1[i]);
- int rm=get_reg(i_regs->regmap,TLREG);
if(ra>=0) {
int offset=imm[i];
int c=(i_regs->wasconst>>rs)&1;
if(rs1[i]==0) {
// Using r0 as a base address
- /*if(rm>=0) {
- if(!entry||entry[rm]!=mgr) {
- generate_map_const(offset,rm);
- } // else did it in the previous cycle
- }*/
if(!entry||entry[ra]!=agr) {
if (opcode[i]==0x22||opcode[i]==0x26) {
emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
}else{
#ifdef HOST_IMM_ADDR32
- if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32) || // LWC1/LDC1/LWC2/LDC2
- (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000))
+ if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2
#endif
emit_movimm(constmap[i][rs]+offset,ra);
regs[i].loadedconst|=1<<ra;
emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
}else{
#ifdef HOST_IMM_ADDR32
- if((itype[i+1]!=LOAD&&(opcode[i+1]&0x3b)!=0x31&&(opcode[i+1]&0x3b)!=0x32) || // LWC1/LDC1/LWC2/LDC2
- (using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000))
+ if((itype[i+1]!=LOAD&&(opcode[i+1]&0x3b)!=0x31&&(opcode[i+1]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2
#endif
emit_movimm(constmap[i+1][rs]+offset,ra);
regs[i+1].loadedconst|=1<<ra;
}
}
-int get_final_value(int hr, int i, int *value)
+static int get_final_value(int hr, int i, int *value)
{
int reg=regs[i].regmap[hr];
while(i<slen-1) {
// Load in delay slot, out-of-order execution
if(itype[i+2]==LOAD&&rs1[i+2]==reg&&rt1[i+2]==reg&&((regs[i+1].wasconst>>hr)&1))
{
- #ifdef HOST_IMM_ADDR32
- if(!using_tlb||((signed int)constmap[i][hr]+imm[i+2])<(signed int)0xC0000000) return 0;
- #endif
// Precompute load address
*value=constmap[i][hr]+imm[i+2];
return 1;
}
if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg)
{
- #ifdef HOST_IMM_ADDR32
- if(!using_tlb||((signed int)constmap[i][hr]+imm[i+1])<(signed int)0xC0000000) return 0;
- #endif
// Precompute load address
*value=constmap[i][hr]+imm[i+1];
//printf("c=%x imm=%x\n",(int)constmap[i][hr],imm[i+1]);
{
return 0;
}
- else
+ else
if((i_dirty>>hr)&1)
{
if(i_regmap[hr]<TEMPREG)
if(rs1[i]) {
if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs1[i])&1)
emit_loadreg(rs1[i],s1l);
- }
+ }
else {
if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1)
emit_loadreg(rs2[i],s1l);
load_all_regs(branch_regs[i].regmap);
}
emit_jmp(stubs[n][2]); // return address
-
+
/* This works but uses a lot of memory...
emit_readword((int)&last_count,ECX);
emit_add(HOST_CCREG,ECX,EAX);
emit_jmpreg(EAX);*/
}
-add_to_linker(int addr,int target,int ext)
+static void add_to_linker(int addr,int target,int ext)
{
link_addr[linkcount][0]=addr;
link_addr[linkcount][1]=target;
- link_addr[linkcount][2]=ext;
+ link_addr[linkcount][2]=ext;
linkcount++;
}
#endif
{
#ifdef REG_PREFETCH
- if(temp>=0)
+ if(temp>=0)
{
if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
}
address_generation(i+1,i_regs,regs[i].regmap_entry);
#ifdef REG_PREFETCH
int temp=get_reg(branch_regs[i].regmap,PTEMP);
- if(rt1[i]==31&&temp>=0)
+ if(rt1[i]==31&&temp>=0)
{
int return_address=start+i*4+8;
- if(get_reg(branch_regs[i].regmap,31)>0)
+ if(get_reg(branch_regs[i].regmap,31)>0)
if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
}
#endif
assert(rt>=0);
return_address=start+i*4+8;
#ifdef REG_PREFETCH
- if(temp>=0)
+ if(temp>=0)
{
if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
}
}
address_generation(i+1,i_regs,regs[i].regmap_entry);
#ifdef REG_PREFETCH
- if(rt1[i]==31)
+ if(rt1[i]==31)
{
if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) {
int return_address=start+i*4+8;
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
if(i>(ba[i]-start)>>2) invert=1;
#endif
-
+
if(ooo[i]) {
s1l=get_reg(branch_regs[i].regmap,rs1[i]);
s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
cc=get_reg(branch_regs[i].regmap,CCREG);
assert(cc==HOST_CCREG);
- if(unconditional)
+ if(unconditional)
store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
//do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional);
//assem_debug("cycle count (adj)\n");
emit_jne(0);
}
} // if(!only32)
-
+
//printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
assert(s1l>=0);
if(opcode[i]==4) // BEQ
emit_jne(1);
}
} // if(!only32)
-
+
//printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
assert(s1l>=0);
if((opcode[i]&0x2f)==4) // BEQ
}
cc=get_reg(branch_regs[i].regmap,CCREG);
assert(cc==HOST_CCREG);
- if(unconditional)
+ if(unconditional)
store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
//do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional);
assem_debug("cycle count (adj)\n");
}
}
} // if(!only32)
-
+
if(invert) {
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) {
{
}
} // if(!only32)
-
+
if(invert) {
if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
{
// If subroutine call, flag return address as a possible branch target
if(rt1[i]==31 && i<slen-2) bt[i+2]=1;
-
+
if(ba[i]<start || ba[i]>=(start+slen*4))
{
// Branch out of this block, flush all regs
u=1;
uu=1;
gte_u=gte_u_unknown;
- /* Hexagon hack
+ /* Hexagon hack
if(itype[i]==UJUMP&&rt1[i]==31)
{
uu=u=0x300C00F; // Discard at, v0-v1, t6-t9
if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
if(i>istart) {
- if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP)
+ if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP)
{
// Don't store a register immediately after writing it,
// may prevent dual-issue.
assem_debug("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out);
//printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out);
//printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr);
- //if(debug)
+ //if(debug)
//printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
//printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29);
/*if(Count>=312978186) {
unsigned int type,op,op2;
//printf("addr = %x source = %x %x\n", addr,source,source[0]);
-
+
/* Pass 1 disassembly */
for(i=0;!done;i++) {
/* Pass 2 - Register dependencies and branch targets */
unneeded_registers(0,slen-1,0);
-
+
/* Pass 3 - Register allocation */
struct regstat current; // Current register allocations/status
unneeded_reg_upper[0]=1;
current.regmap[HOST_BTREG]=BTREG;
}
-
+
for(i=0;i<slen;i++)
{
if(bt[i])
}
} else {
// First instruction expects CCREG to be allocated
- if(i==0&&hr==HOST_CCREG)
+ if(i==0&&hr==HOST_CCREG)
regs[i].regmap_entry[hr]=CCREG;
else
regs[i].regmap_entry[hr]=-1;
pagespan_alloc(¤t,i);
break;
}
-
+
// Drop the upper half of registers that have become 32-bit
current.uu|=current.is32&((1LL<<rt1[i])|(1LL<<rt2[i]));
if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) {
}
} else {
// Branches expect CCREG to be allocated at the target
- if(regmap_pre[i][hr]==CCREG)
+ if(regmap_pre[i][hr]==CCREG)
regs[i].regmap_entry[hr]=CCREG;
else
regs[i].regmap_entry[hr]=-1;
if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1;
regs[i].waswritten=current.waswritten;
}
-
+
/* Pass 4 - Cull unused host registers */
-
+
uint64_t nr=0;
-
+
for (i=slen-1;i>=0;i--)
{
int hr;
}
// Save it
needed_reg[i]=nr;
-
+
// Deallocate unneeded registers
for(hr=0;hr<HOST_REGS;hr++)
{
d1=dep1[i+1];
d2=dep2[i+1];
}
- if(using_tlb) {
- if(itype[i+1]==LOAD || itype[i+1]==LOADLR ||
- itype[i+1]==STORE || itype[i+1]==STORELR ||
- itype[i+1]==C1LS || itype[i+1]==C2LS)
- map=TLREG;
- } else
if(itype[i+1]==STORE || itype[i+1]==STORELR ||
(opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
map=INVCP;
d1=dep1[i];
d2=dep2[i];
}
- if(using_tlb) {
- if(itype[i]==LOAD || itype[i]==LOADLR ||
- itype[i]==STORE || itype[i]==STORELR ||
- itype[i]==C1LS || itype[i]==C2LS)
- map=TLREG;
- } else if(itype[i]==STORE || itype[i]==STORELR ||
+ if(itype[i]==STORE || itype[i]==STORELR ||
(opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
map=INVCP;
}
}
}
}
-
+
/* Pass 5 - Pre-allocate registers */
-
+
// If a register is allocated during a loop, try to allocate it for the
// entire loop, if possible. This avoids loading/storing registers
// inside of the loop.
-
+
signed char f_regmap[HOST_REGS];
clear_all_regs(f_regmap);
for(i=0;i<slen-1;i++)
{
if(itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
{
- if(ba[i]>=start && ba[i]<(start+i*4))
+ if(ba[i]>=start && ba[i]<(start+i*4))
if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU
||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD
||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS
}
}
if(ooo[i]) {
- if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1])
+ if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1])
f_regmap[hr]=branch_regs[i].regmap[hr];
}else{
- if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1])
+ if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1])
f_regmap[hr]=branch_regs[i].regmap[hr];
}
// Avoid dirty->clean transition
if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP)
{
if(ooo[j]) {
- if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1])
+ if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1])
break;
}else{
- if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1])
+ if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1])
break;
}
if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) {
regs[k].isconst&=~(1<<HOST_CCREG);
k++;
}
- regs[j].regmap_entry[HOST_CCREG]=CCREG;
+ regs[j].regmap_entry[HOST_CCREG]=CCREG;
}
// Work backwards from the branch target
if(j>i&&f_regmap[HOST_CCREG]==CCREG)
}
}
}
-
+
// Cache memory offset or tlb map pointer if a register is available
#ifndef HOST_IMM_ADDR32
#ifndef RAM_OFFSET
- if(using_tlb)
+ if(0)
#endif
{
int earliest_available[HOST_REGS];
int loop_start[HOST_REGS];
int score[HOST_REGS];
int end[HOST_REGS];
- int reg=using_tlb?MMREG:ROREG;
+ int reg=ROREG;
// Init
for(hr=0;hr<HOST_REGS;hr++) {
}
}
#endif
-
+
// This allocates registers (if possible) one instruction prior
// to use, which can avoid a load-use penalty on certain CPUs.
for(i=0;i<slen-1;i++)
}
}
}
- // Load source into target register
+ // Load source into target register
if(lt1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
{
}
}
}
- // Preload map address
- #ifndef HOST_IMM_ADDR32
- if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) {
- hr=get_reg(regs[i+1].regmap,TLREG);
- if(hr>=0) {
- int sr=get_reg(regs[i+1].regmap,rs1[i+1]);
- if(sr>=0&&((regs[i+1].wasconst>>sr)&1)) {
- int nr;
- if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0)
- {
- regs[i].regmap[hr]=MGEN1+((i+1)&1);
- regmap_pre[i+1][hr]=MGEN1+((i+1)&1);
- regs[i+1].regmap_entry[hr]=MGEN1+((i+1)&1);
- regs[i].isconst&=~(1<<hr);
- regs[i].isconst|=regs[i+1].isconst&(1<<hr);
- constmap[i][hr]=constmap[i+1][hr];
- regs[i+1].wasdirty&=~(1<<hr);
- regs[i].dirty&=~(1<<hr);
- }
- else if((nr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1))>=0)
- {
- // move it to another register
- regs[i+1].regmap[hr]=-1;
- regmap_pre[i+2][hr]=-1;
- regs[i+1].regmap[nr]=TLREG;
- regmap_pre[i+2][nr]=TLREG;
- regs[i].regmap[nr]=MGEN1+((i+1)&1);
- regmap_pre[i+1][nr]=MGEN1+((i+1)&1);
- regs[i+1].regmap_entry[nr]=MGEN1+((i+1)&1);
- regs[i].isconst&=~(1<<nr);
- regs[i+1].isconst&=~(1<<nr);
- regs[i].dirty&=~(1<<nr);
- regs[i+1].wasdirty&=~(1<<nr);
- regs[i+1].dirty&=~(1<<nr);
- regs[i+2].wasdirty&=~(1<<nr);
- }
- }
- }
- }
- #endif
// Address for store instruction (non-constant)
if(itype[i+1]==STORE||itype[i+1]==STORELR
||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2
}
}
if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) {
- if(itype[i+1]==LOAD)
+ if(itype[i+1]==LOAD)
hr=get_reg(regs[i+1].regmap,rt1[i+1]);
if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2
hr=get_reg(regs[i+1].regmap,FTEMP);
}
}
}
-
+
/* Pass 6 - Optimize clean/dirty state */
clean_registers(0,slen-1,1);
-
+
/* Pass 7 - Identify 32-bit registers */
for (i=slen-1;i>=0;i--)
{
if((needed_reg[i]>>5)&1) printf("ebp ");
if((needed_reg[i]>>6)&1) printf("esi ");
if((needed_reg[i]>>7)&1) printf("edi ");
- printf("r:");
- for(r=0;r<=CCREG;r++) {
- //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) {
- if((requires_32bit[i]>>r)&1) {
- if(r==CCREG) printf(" CC");
- else if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
printf("\n");
- /*printf("pr:");
- for(r=0;r<=CCREG;r++) {
- //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) {
- if((pr32[i]>>r)&1) {
- if(r==CCREG) printf(" CC");
- else if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- if(pr32[i]!=requires_32bit[i]) printf(" OOPS");
- printf("\n");*/
#if defined(__i386__) || defined(__x86_64__)
printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]);
printf("dirty: ");
#endif
printf("\n");
}
- /*printf(" p32:");
- for(r=0;r<=CCREG;r++) {
- if((p32[i]>>r)&1) {
- if(r==CCREG) printf(" CC");
- else if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- if(p32[i]!=regs[i].is32) printf(" NO MATCH\n");
- else printf("\n");*/
if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
#if defined(__i386__) || defined(__x86_64__)
printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
//printf("shadow buffer: %x-%x\n",(int)copy,(int)copy+slen*4);
memcpy(copy,source,slen*4);
copy+=slen*4;
-
+
#ifdef __arm__
__clear_cache((void *)beginning,out);
#endif
-
+
// If we're within 256K of the end of the buffer,
// start over from the beginning. (Is 256K enough?)
if((u_int)out>(u_int)BASE_ADDR+(1<<TARGET_SIZE_2)-MAX_OUTPUT_BLOCK_SIZE) out=(u_char *)BASE_ADDR;
-
+
// Trap writes to any of the pages we compiled
for(i=start>>12;i<=(start+slen*4)>>12;i++) {
invalid_code[i]=0;
invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
-
+
/* Pass 10 - Free memory by expiring oldest blocks */
-
+
int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535;
while(expirep!=end)
{
case 3:
// Clear jump_out
#ifdef __arm__
- if((expirep&2047)==0)
+ if((expirep&2047)==0)
do_clear_cache();
#endif
ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift);