extern int branch_target;
extern uint64_t readmem_dword;
extern void *dynarec_local;
-extern u_int memory_map[1048576];
extern u_int mini_ht[32][2];
-extern u_int rounding_modes[4];
void indirect_jump_indexed();
void indirect_jump();
"lr",
"pc"};
-void output_byte(u_char byte)
-{
- *(out++)=byte;
-}
-void output_modrm(u_char mod,u_char rm,u_char ext)
-{
- assert(mod<4);
- assert(rm<8);
- assert(ext<8);
- u_char byte=(mod<<6)|(ext<<3)|rm;
- *(out++)=byte;
-}
-void output_sib(u_char scale,u_char index,u_char base)
-{
- assert(scale<4);
- assert(index<8);
- assert(base<8);
- u_char byte=(scale<<6)|(index<<3)|base;
- *(out++)=byte;
-}
void output_w32(u_int word)
{
*((u_int *)out)=word;
assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
}
-/*void emit_sbcimm(int imm,u_int rt)
-{
- u_int armval;
- genimm_checked(imm,&armval);
- assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
- output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
-}*/
-void emit_sbbimm(int imm,u_int rt)
-{
- assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
- assert(rt<8);
- if(imm<128&&imm>=-128) {
- output_byte(0x83);
- output_modrm(3,rt,3);
- output_byte(imm);
- }
- else
- {
- output_byte(0x81);
- output_modrm(3,rt,3);
- output_w32(imm);
- }
-}
+
void emit_rscimm(int rs,int imm,u_int rt)
{
assert(0);
emit_adcimm(rsh,0,rth);
}
-void emit_sbb(int rs1,int rs2)
-{
- assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
- output_byte(0x19);
- output_modrm(3,rs1,rs2);
-}
-
void emit_andimm(int rs,int imm,int rt)
{
u_int armval;
assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
}
-void emit_shlcl(int r)
-{
- assem_debug("shl %%%s,%%cl\n",regname[r]);
- assert(0);
-}
-void emit_shrcl(int r)
-{
- assem_debug("shr %%%s,%%cl\n",regname[r]);
- assert(0);
-}
-void emit_sarcl(int r)
-{
- assem_debug("sar %%%s,%%cl\n",regname[r]);
- assert(0);
-}
-void emit_shldcl(int r1,int r2)
-{
- assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
- assert(0);
-}
-void emit_shrdcl(int r1,int r2)
-{
- assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
- assert(0);
-}
void emit_orrshl(u_int rs,u_int shift,u_int rt)
{
assert(rs<16);
}
}
-void emit_cmovne(u_int *addr,int rt)
-{
- assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
- assert(0);
-}
-void emit_cmovl(u_int *addr,int rt)
-{
- assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
- assert(0);
-}
-void emit_cmovs(u_int *addr,int rt)
-{
- assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
- assert(0);
-}
void emit_cmovne_imm(int imm,int rt)
{
assem_debug("movne %s,#%d\n",regname[rt],imm);
output_w32(0x3a000000|offset);
}
-void emit_pushimm(int imm)
-{
- assem_debug("push $%x\n",imm);
- assert(0);
-}
-void emit_pusha()
-{
- assem_debug("pusha\n");
- assert(0);
-}
-void emit_popa()
-{
- assem_debug("popa\n");
- assert(0);
-}
-void emit_pushreg(u_int r)
-{
- assem_debug("push %%%s\n",regname[r]);
- assert(0);
-}
-void emit_popreg(u_int r)
-{
- assem_debug("pop %%%s\n",regname[r]);
- assert(0);
-}
void emit_callreg(u_int r)
{
assert(r<15);
assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
}
-void emit_movzwl_reg(int rs, int rt)
-{
- assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
- assert(0);
-}
-void emit_xchg(int rs, int rt)
-{
- assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
- assert(0);
-}
void emit_writeword_indexed(int rt, int offset, int rs)
{
assert(offset>-4096&&offset<4096);
assem_debug("strb %s,fp+%d\n",regname[rt],offset);
output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
}
-void emit_writeword_imm(int imm, int addr)
-{
- assem_debug("movl $%x,%x\n",imm,addr);
- assert(0);
-}
-void emit_writebyte_imm(int imm, int addr)
-{
- assem_debug("movb $%x,%x\n",imm,addr);
- assert(0);
-}
-void emit_mul(int rs)
-{
- assem_debug("mul %%%s\n",regname[rs]);
- assert(0);
-}
-void emit_imul(int rs)
-{
- assem_debug("imul %%%s\n",regname[rs]);
- assert(0);
-}
void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
{
assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
}
-void emit_div(int rs)
-{
- assem_debug("div %%%s\n",regname[rs]);
- assert(0);
-}
-void emit_idiv(int rs)
-{
- assem_debug("idiv %%%s\n",regname[rs]);
- assert(0);
-}
-void emit_cdq()
-{
- assem_debug("cdq\n");
- assert(0);
-}
-
void emit_clz(int rs,int rt)
{
assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
}
}
-// special case for checking invalid_code
-void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
-{
- assert(0);
-}
-
// special case for checking invalid_code
void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
{
emit_cmpimm(HOST_TEMPREG,imm);
}
-// special case for tlb mapping
-void emit_addsr12(int rs1,int rs2,int rt)
-{
- assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
- output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
-}
-
void emit_callne(int a)
{
assem_debug("blne %x\n",a);
}
// Used to preload hash table entries
-void emit_prefetch(void *addr)
-{
- assem_debug("prefetch %x\n",(int)addr);
- output_byte(0x0F);
- output_byte(0x18);
- output_modrm(0,5,1);
- output_w32((int)addr);
-}
void emit_prefetchreg(int r)
{
assem_debug("pld %s\n",regname[r]);
output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
}
-void emit_flds(int r,int sr)
-{
- assem_debug("flds s%d,[%s]\n",sr,regname[r]);
- output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
-}
-
-void emit_vldr(int r,int vr)
-{
- assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
- output_w32(0xed900b00|(vr<<12)|(r<<16));
-}
-
-void emit_fsts(int sr,int r)
-{
- assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
- output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
-}
-
-void emit_vstr(int vr,int r)
-{
- assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
- output_w32(0xed800b00|(vr<<12)|(r<<16));
-}
-
-void emit_ftosizs(int s,int d)
-{
- assem_debug("ftosizs s%d,s%d\n",d,s);
- output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
-}
-
-void emit_ftosizd(int s,int d)
-{
- assem_debug("ftosizd s%d,d%d\n",d,s);
- output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
-}
-
-void emit_fsitos(int s,int d)
-{
- assem_debug("fsitos s%d,s%d\n",d,s);
- output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
-}
-
-void emit_fsitod(int s,int d)
-{
- assem_debug("fsitod d%d,s%d\n",d,s);
- output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
-}
-
-void emit_fcvtds(int s,int d)
-{
- assem_debug("fcvtds d%d,s%d\n",d,s);
- output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
-}
-
-void emit_fcvtsd(int s,int d)
-{
- assem_debug("fcvtsd s%d,d%d\n",d,s);
- output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
-}
-
-void emit_fsqrts(int s,int d)
-{
- assem_debug("fsqrts d%d,s%d\n",d,s);
- output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
-}
-
-void emit_fsqrtd(int s,int d)
-{
- assem_debug("fsqrtd s%d,d%d\n",d,s);
- output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
-}
-
-void emit_fabss(int s,int d)
-{
- assem_debug("fabss d%d,s%d\n",d,s);
- output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
-}
-
-void emit_fabsd(int s,int d)
-{
- assem_debug("fabsd s%d,d%d\n",d,s);
- output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
-}
-
-void emit_fnegs(int s,int d)
-{
- assem_debug("fnegs d%d,s%d\n",d,s);
- output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
-}
-
-void emit_fnegd(int s,int d)
-{
- assem_debug("fnegd s%d,d%d\n",d,s);
- output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
-}
-
-void emit_fadds(int s1,int s2,int d)
-{
- assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
- output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
-}
-
-void emit_faddd(int s1,int s2,int d)
-{
- assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
- output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
-}
-
-void emit_fsubs(int s1,int s2,int d)
-{
- assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
- output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
-}
-
-void emit_fsubd(int s1,int s2,int d)
-{
- assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
- output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
-}
-
-void emit_fmuls(int s1,int s2,int d)
-{
- assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
- output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
-}
-
-void emit_fmuld(int s1,int s2,int d)
-{
- assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
- output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
-}
-
-void emit_fdivs(int s1,int s2,int d)
-{
- assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
- output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
-}
-
-void emit_fdivd(int s1,int s2,int d)
-{
- assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
- output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
-}
-
-void emit_fcmps(int x,int y)
-{
- assem_debug("fcmps s14, s15\n");
- output_w32(0xeeb47a67);
-}
-
-void emit_fcmpd(int x,int y)
-{
- assem_debug("fcmpd d6, d7\n");
- output_w32(0xeeb46b47);
-}
-
-void emit_fmstat()
-{
- assem_debug("fmstat\n");
- output_w32(0xeef1fa10);
-}
-
void emit_bicne_imm(int rs,int imm,int rt)
{
u_int armval;
restore_regs_all(reglist);
}
-// Write back consts using r14 so we don't disturb the other registers
-void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
-{
- int hr;
- for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
- if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
- if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
- int value=constmap[i][hr];
- if(value==0) {
- emit_zeroreg(HOST_TEMPREG);
- }
- else {
- emit_movimm(value,HOST_TEMPREG);
- }
- emit_storereg(i_regmap[hr],HOST_TEMPREG);
- }
- }
- }
- }
-}
-
/* Stubs/epilogue */
void literal_pool(int n)
emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
}
-static int do_tlb_r(int a, ...) { return 0; }
-static int do_tlb_r_branch(int a, ...) { return 0; }
-static int gen_tlb_addr_r(int a, ...) { return 0; }
-static int do_tlb_w(int a, ...) { return 0; }
-static int do_tlb_w_branch(int a, ...) { return 0; }
-static int gen_tlb_addr_w(int a, ...) { return 0; }
-
/* Special assem */
void shift_assemble_arm(int i,struct regstat *i_regs)
c=(i_regs->wasconst>>s)&1;
if(c) {
memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
- if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
}
}
- if(!using_tlb) {
- if(!c) {
- #ifdef RAM_OFFSET
- map=get_reg(i_regs->regmap,ROREG);
- if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
- #endif
- emit_shlimm(addr,3,temp);
- if (opcode[i]==0x22||opcode[i]==0x26) {
- emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
- }else{
- emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
- }
- jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
+ if(!c) {
+ #ifdef RAM_OFFSET
+ map=get_reg(i_regs->regmap,ROREG);
+ if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
+ #endif
+ emit_shlimm(addr,3,temp);
+ if (opcode[i]==0x22||opcode[i]==0x26) {
+ emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
+ }else{
+ emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
}
- else {
- if(ram_offset&&memtarget) {
- emit_addimm(temp2,ram_offset,HOST_TEMPREG);
- fastload_reg_override=HOST_TEMPREG;
- }
- if (opcode[i]==0x22||opcode[i]==0x26) {
- emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
- }else{
- emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
- }
+ jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
+ }
+ else {
+ if(ram_offset&&memtarget) {
+ emit_addimm(temp2,ram_offset,HOST_TEMPREG);
+ fastload_reg_override=HOST_TEMPREG;
}
- }else{ // using tlb
- int a;
- if(c) {
- a=-1;
- }else if (opcode[i]==0x22||opcode[i]==0x26) {
- a=0xFFFFFFFC; // LWL/LWR
+ if (opcode[i]==0x22||opcode[i]==0x26) {
+ emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
}else{
- a=0xFFFFFFF8; // LDL/LDR
- }
- map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- reglist&=~(1<<map);
- if(c) {
- if (opcode[i]==0x22||opcode[i]==0x26) {
- emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
- }else{
- emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
- }
+ emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
}
}
if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
uint64_t unneeded_reg_upper[MAXBLOCK];
uint64_t branch_unneeded_reg[MAXBLOCK];
uint64_t branch_unneeded_reg_upper[MAXBLOCK];
- uint64_t p32[MAXBLOCK];
uint64_t pr32[MAXBLOCK];
signed char regmap_pre[MAXBLOCK][HOST_REGS];
static uint64_t current_constmap[HOST_REGS];
static struct regstat branch_regs[MAXBLOCK];
signed char minimum_free_regs[MAXBLOCK];
u_int needed_reg[MAXBLOCK];
- uint64_t requires_32bit[MAXBLOCK];
u_int wont_dirty[MAXBLOCK];
u_int will_dirty[MAXBLOCK];
int ccadj[MAXBLOCK];
char shadow[1048576] __attribute__((aligned(16)));
void *copy;
int expirep;
- static const u_int using_tlb=0;
int new_dynarec_did_compile;
int new_dynarec_hacks;
u_int stop_after_jal;
#define CSREG 35 // Coprocessor status
#define CCREG 36 // Cycle count
#define INVCP 37 // Pointer to invalid_code
-#define MMREG 38 // Pointer to memory_map
+//#define MMREG 38 // Pointer to memory_map
#define ROREG 39 // ram offset (if rdram!=0x80000000)
#define TEMPREG 40
#define FTEMP 40 // FPU temporary register
#define PTEMP 41 // Prefetch temporary register
-#define TLREG 42 // TLB mapping offset
+//#define TLREG 42 // TLB mapping offset
#define RHASH 43 // Return address hash
#define RHTBL 44 // Return address hash table address
#define RTEMP 45 // JR/JALR address register
#define MAXREG 45
#define AGEN1 46 // Address generation temporary register
-#define AGEN2 47 // Address generation temporary register
-#define MGEN1 48 // Maptable address generation temporary register
-#define MGEN2 49 // Maptable address generation temporary register
+//#define AGEN2 47 // Address generation temporary register
+//#define MGEN1 48 // Maptable address generation temporary register
+//#define MGEN2 49 // Maptable address generation temporary register
#define BTREG 50 // Branch target temporary register
/* instruction types */
void invalidate_block(u_int block);
void invalidate_addr(u_int addr);
void remove_hash(int vaddr);
-void jump_vaddr();
void dyna_linker();
void dyna_linker_ds();
void verify_code();
void cc_interrupt();
void fp_exception();
void fp_exception_ds();
-void jump_syscall();
void jump_syscall_hle();
-void jump_eret();
void jump_hlecall();
void jump_intcall();
void new_dyna_leave();
-// TLB
-void TLBWI_new();
-void TLBWR_new();
-void read_nomem_new();
-void read_nomemb_new();
-void read_nomemh_new();
-void read_nomemd_new();
-void write_nomem_new();
-void write_nomemb_new();
-void write_nomemh_new();
-void write_nomemd_new();
-void write_rdram_new();
-void write_rdramb_new();
-void write_rdramh_new();
-void write_rdramd_new();
-extern u_int memory_map[1048576];
-
// Needed by assembler
void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
hsn[FTEMP]=0;
}
- // Don't remove the TLB registers either
- if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS || itype[i]==C2LS) {
- hsn[TLREG]=0;
- }
// Don't remove the miniht registers
if(itype[i]==UJUMP||itype[i]==RJUMP)
{
}
else current->is32|=1LL<<rt1[i];
dirty_reg(current,rt1[i]);
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
// LWL/LWR need a temporary register for the old value
if(opcode[i]==0x22||opcode[i]==0x26)
{
{
alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
}
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
alloc_reg_temp(current,i,-1);
minimum_free_regs[i]=1;
if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
alloc_reg64(current,i,rs2[i]);
if(rs2[i]) alloc_reg(current,i,FTEMP);
}
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
#if defined(HOST_IMM8)
// On CPUs without 32-bit immediates we need a pointer to invalid_code
else alloc_reg(current,i,INVCP);
if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
alloc_reg64(current,i,FTEMP);
}
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
#if defined(HOST_IMM8)
// On CPUs without 32-bit immediates we need a pointer to invalid_code
else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
clear_const(current,rt1[i]);
if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
alloc_reg(current,i,FTEMP);
- // If using TLB, need a register for pointer to the mapping table
- if(using_tlb) alloc_reg(current,i,TLREG);
#if defined(HOST_IMM8)
// On CPUs without 32-bit immediates we need a pointer to invalid_code
- else if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
+ if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
alloc_reg(current,i,INVCP);
#endif
// We need a temporary register for address generation
//printf("TRACE: %x\n",(&i)[-1]);
}
-void tlb_debug(u_int cause, u_int addr, u_int iaddr)
-{
- printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
-}
-
void alu_assemble(int i,struct regstat *i_regs)
{
if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
c=(i_regs->wasconst>>s)&1;
if (c) {
memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
- if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
}
}
//printf("load_assemble: c=%d\n",c);
assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
reglist&=~(1<<tl);
if(th>=0) reglist&=~(1<<th);
- if(!using_tlb) {
- if(!c) {
- #ifdef RAM_OFFSET
- map=get_reg(i_regs->regmap,ROREG);
- if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
- #endif
-//#define R29_HACK 1
- #ifdef R29_HACK
- // Strmnnrmn's speed hack
- if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
- #endif
- {
- jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
- }
+ if(!c) {
+ #ifdef RAM_OFFSET
+ map=get_reg(i_regs->regmap,ROREG);
+ if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
+ #endif
+ #ifdef R29_HACK
+ // Strmnnrmn's speed hack
+ if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
+ #endif
+ {
+ jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
}
- else if(ram_offset&&memtarget) {
- emit_addimm(addr,ram_offset,HOST_TEMPREG);
- fastload_reg_override=HOST_TEMPREG;
- }
- }else{ // using tlb
- int x=0;
- if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
- if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
- map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- reglist&=~(1<<map);
- map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
- do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
+ }
+ else if(ram_offset&&memtarget) {
+ emit_addimm(addr,ram_offset,HOST_TEMPREG);
+ fastload_reg_override=HOST_TEMPREG;
}
int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
if (opcode[i]==0x20) { // LB
#endif
{
//emit_xorimm(addr,3,tl);
- //gen_tlb_addr_r(tl,map);
//emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
int x=0,a=tl;
#ifdef BIG_ENDIAN_MIPS
//emit_movswl_indexed_tlb(x,tl,map,tl);
//else
if(map>=0) {
- gen_tlb_addr_r(a,map);
emit_movswl_indexed(x,a,tl);
}else{
#if 1 //def RAM_OFFSET
#endif
{
//emit_xorimm(addr,3,tl);
- //gen_tlb_addr_r(tl,map);
//emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
int x=0,a=tl;
#ifdef BIG_ENDIAN_MIPS
//emit_movzwl_indexed_tlb(x,tl,map,tl);
//#else
if(map>=0) {
- gen_tlb_addr_r(a,map);
emit_movzwl_indexed(x,a,tl);
}else{
#if 1 //def RAM_OFFSET
if(!dummy) {
int a=addr;
if(fastload_reg_override) a=fastload_reg_override;
- //gen_tlb_addr_r(tl,map);
//if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
//emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
#ifdef HOST_IMM_ADDR32
c=(i_regs->wasconst>>s)&1;
if(c) {
memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
- if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
}
}
assert(tl>=0);
if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
if(offset||s<0||c) addr=temp;
else addr=s;
- if(!using_tlb) {
- if(!c) {
- jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
- }
- else if(ram_offset&&memtarget) {
- emit_addimm(addr,ram_offset,HOST_TEMPREG);
- faststore_reg_override=HOST_TEMPREG;
- }
- }else{ // using tlb
- int x=0;
- if (opcode[i]==0x28) x=3; // SB
- if (opcode[i]==0x29) x=2; // SH
- map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- reglist&=~(1<<map);
- map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
- do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
+ if(!c) {
+ jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
+ }
+ else if(ram_offset&&memtarget) {
+ emit_addimm(addr,ram_offset,HOST_TEMPREG);
+ faststore_reg_override=HOST_TEMPREG;
}
if (opcode[i]==0x28) { // SB
if(!c) a=addr;
#endif
if(faststore_reg_override) a=faststore_reg_override;
- //gen_tlb_addr_w(temp,map);
//emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
emit_writebyte_indexed_tlb(tl,x,a,map,a);
}
//emit_writehword_indexed_tlb(tl,x,temp,map,temp);
//#else
if(map>=0) {
- gen_tlb_addr_w(a,map);
emit_writehword_indexed(tl,x,a);
}else
//emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
jaddr=0;
}
- if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
+ if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
if(!c||memtarget) {
#ifdef DESTRUCTIVE_SHIFT
// The x86 shift operation is 'destructive'; it overwrites the
c=(i_regs->isconst>>s)&1;
if(c) {
memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
- if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
}
}
assert(tl>=0);
if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
}
assert(temp>=0);
- if(!using_tlb) {
- if(!c) {
- emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
- if(!offset&&s!=temp) emit_mov(s,temp);
- jaddr=(int)out;
- emit_jno(0);
- }
- else
- {
- if(!memtarget||!rs1[i]) {
- jaddr=(int)out;
- emit_jmp(0);
- }
- }
- #ifdef RAM_OFFSET
- int map=get_reg(i_regs->regmap,ROREG);
- if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
- gen_tlb_addr_w(temp,map);
- #else
- if((u_int)rdram!=0x80000000)
- emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
- #endif
- }else{ // using tlb
- int map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- reglist&=~(1<<map);
- map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
- if(!c&&!offset&&s>=0) emit_mov(s,temp);
- do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
- if(!jaddr&&!memtarget) {
+ if(!c) {
+ emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
+ if(!offset&&s!=temp) emit_mov(s,temp);
+ jaddr=(int)out;
+ emit_jno(0);
+ }
+ else
+ {
+ if(!memtarget||!rs1[i]) {
jaddr=(int)out;
emit_jmp(0);
}
- gen_tlb_addr_w(temp,map);
}
+ #ifdef RAM_OFFSET
+ int map=get_reg(i_regs->regmap,ROREG);
+ if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
+ #else
+ if((u_int)rdram!=0x80000000)
+ emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
+ #endif
if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
temp2=get_reg(i_regs->regmap,FTEMP);
}
if(!c||!memtarget)
add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
- if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
+ if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
#ifdef RAM_OFFSET
int map=get_reg(i_regs->regmap,ROREG);
if(map<0) map=HOST_TEMPREG;
offset=imm[i];
assert(rs1[i]>0);
assert(tl>=0);
- assert(!using_tlb);
for(hr=0;hr<HOST_REGS;hr++) {
if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
if(itype[i]==LOAD||itype[i]==LOADLR||itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS||itype[i]==C2LS) {
int ra=-1;
int agr=AGEN1+(i&1);
- int mgr=MGEN1+(i&1);
if(itype[i]==LOAD) {
ra=get_reg(i_regs->regmap,rt1[i]);
if(ra<0) ra=get_reg(i_regs->regmap,-1);
}
}
int rs=get_reg(i_regs->regmap,rs1[i]);
- int rm=get_reg(i_regs->regmap,TLREG);
if(ra>=0) {
int offset=imm[i];
int c=(i_regs->wasconst>>rs)&1;
if(rs1[i]==0) {
// Using r0 as a base address
- /*if(rm>=0) {
- if(!entry||entry[rm]!=mgr) {
- generate_map_const(offset,rm);
- } // else did it in the previous cycle
- }*/
if(!entry||entry[ra]!=agr) {
if (opcode[i]==0x22||opcode[i]==0x26) {
emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
}else{
#ifdef HOST_IMM_ADDR32
- if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32) || // LWC1/LDC1/LWC2/LDC2
- (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000))
+ if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2
#endif
emit_movimm(constmap[i][rs]+offset,ra);
regs[i].loadedconst|=1<<ra;
emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
}else{
#ifdef HOST_IMM_ADDR32
- if((itype[i+1]!=LOAD&&(opcode[i+1]&0x3b)!=0x31&&(opcode[i+1]&0x3b)!=0x32) || // LWC1/LDC1/LWC2/LDC2
- (using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000))
+ if((itype[i+1]!=LOAD&&(opcode[i+1]&0x3b)!=0x31&&(opcode[i+1]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2
#endif
emit_movimm(constmap[i+1][rs]+offset,ra);
regs[i+1].loadedconst|=1<<ra;
// Load in delay slot, out-of-order execution
if(itype[i+2]==LOAD&&rs1[i+2]==reg&&rt1[i+2]==reg&&((regs[i+1].wasconst>>hr)&1))
{
- #ifdef HOST_IMM_ADDR32
- if(!using_tlb||((signed int)constmap[i][hr]+imm[i+2])<(signed int)0xC0000000) return 0;
- #endif
// Precompute load address
*value=constmap[i][hr]+imm[i+2];
return 1;
}
if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg)
{
- #ifdef HOST_IMM_ADDR32
- if(!using_tlb||((signed int)constmap[i][hr]+imm[i+1])<(signed int)0xC0000000) return 0;
- #endif
// Precompute load address
*value=constmap[i][hr]+imm[i+1];
//printf("c=%x imm=%x\n",(int)constmap[i][hr],imm[i+1]);
d1=dep1[i+1];
d2=dep2[i+1];
}
- if(using_tlb) {
- if(itype[i+1]==LOAD || itype[i+1]==LOADLR ||
- itype[i+1]==STORE || itype[i+1]==STORELR ||
- itype[i+1]==C1LS || itype[i+1]==C2LS)
- map=TLREG;
- } else
if(itype[i+1]==STORE || itype[i+1]==STORELR ||
(opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
map=INVCP;
d1=dep1[i];
d2=dep2[i];
}
- if(using_tlb) {
- if(itype[i]==LOAD || itype[i]==LOADLR ||
- itype[i]==STORE || itype[i]==STORELR ||
- itype[i]==C1LS || itype[i]==C2LS)
- map=TLREG;
- } else if(itype[i]==STORE || itype[i]==STORELR ||
+ if(itype[i]==STORE || itype[i]==STORELR ||
(opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
map=INVCP;
}
// Cache memory offset or tlb map pointer if a register is available
#ifndef HOST_IMM_ADDR32
#ifndef RAM_OFFSET
- if(using_tlb)
+ if(0)
#endif
{
int earliest_available[HOST_REGS];
int loop_start[HOST_REGS];
int score[HOST_REGS];
int end[HOST_REGS];
- int reg=using_tlb?MMREG:ROREG;
+ int reg=ROREG;
// Init
for(hr=0;hr<HOST_REGS;hr++) {
}
}
}
- // Preload map address
- #ifndef HOST_IMM_ADDR32
- if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) {
- hr=get_reg(regs[i+1].regmap,TLREG);
- if(hr>=0) {
- int sr=get_reg(regs[i+1].regmap,rs1[i+1]);
- if(sr>=0&&((regs[i+1].wasconst>>sr)&1)) {
- int nr;
- if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0)
- {
- regs[i].regmap[hr]=MGEN1+((i+1)&1);
- regmap_pre[i+1][hr]=MGEN1+((i+1)&1);
- regs[i+1].regmap_entry[hr]=MGEN1+((i+1)&1);
- regs[i].isconst&=~(1<<hr);
- regs[i].isconst|=regs[i+1].isconst&(1<<hr);
- constmap[i][hr]=constmap[i+1][hr];
- regs[i+1].wasdirty&=~(1<<hr);
- regs[i].dirty&=~(1<<hr);
- }
- else if((nr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1))>=0)
- {
- // move it to another register
- regs[i+1].regmap[hr]=-1;
- regmap_pre[i+2][hr]=-1;
- regs[i+1].regmap[nr]=TLREG;
- regmap_pre[i+2][nr]=TLREG;
- regs[i].regmap[nr]=MGEN1+((i+1)&1);
- regmap_pre[i+1][nr]=MGEN1+((i+1)&1);
- regs[i+1].regmap_entry[nr]=MGEN1+((i+1)&1);
- regs[i].isconst&=~(1<<nr);
- regs[i+1].isconst&=~(1<<nr);
- regs[i].dirty&=~(1<<nr);
- regs[i+1].wasdirty&=~(1<<nr);
- regs[i+1].dirty&=~(1<<nr);
- regs[i+2].wasdirty&=~(1<<nr);
- }
- }
- }
- }
- #endif
// Address for store instruction (non-constant)
if(itype[i+1]==STORE||itype[i+1]==STORELR
||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2
if((needed_reg[i]>>5)&1) printf("ebp ");
if((needed_reg[i]>>6)&1) printf("esi ");
if((needed_reg[i]>>7)&1) printf("edi ");
- printf("r:");
- for(r=0;r<=CCREG;r++) {
- //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) {
- if((requires_32bit[i]>>r)&1) {
- if(r==CCREG) printf(" CC");
- else if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
printf("\n");
- /*printf("pr:");
- for(r=0;r<=CCREG;r++) {
- //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) {
- if((pr32[i]>>r)&1) {
- if(r==CCREG) printf(" CC");
- else if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- if(pr32[i]!=requires_32bit[i]) printf(" OOPS");
- printf("\n");*/
#if defined(__i386__) || defined(__x86_64__)
printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]);
printf("dirty: ");
#endif
printf("\n");
}
- /*printf(" p32:");
- for(r=0;r<=CCREG;r++) {
- if((p32[i]>>r)&1) {
- if(r==CCREG) printf(" CC");
- else if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- if(p32[i]!=regs[i].is32) printf(" NO MATCH\n");
- else printf("\n");*/
if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
#if defined(__i386__) || defined(__x86_64__)
printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);