static u_int get_page(u_int vaddr)
{
+#ifndef PCSX
u_int page=(vaddr^0x80000000)>>12;
+#else
+ u_int page=vaddr&~0xe0000000;
+ if (page < 0x1000000)
+ page &= ~0x0e00000; // RAM mirrors
+ page>>=12;
+#endif
#ifndef DISABLE_TLB
if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
#endif
// Dereference the pointers and remove if it matches
void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
{
- u_int old_host_addr=0;
while(head) {
int ptr=get_pointer(head->addr);
inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
{
inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
u_int host_addr=(u_int)kill_pointer(head->addr);
-
- if((host_addr>>12)!=(old_host_addr>>12)) {
- #ifdef __arm__
- __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff));
- #endif
- old_host_addr=host_addr;
- }
+ #ifdef __arm__
+ needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
+ #endif
}
head=head->next;
}
- #ifdef __arm__
- if (old_host_addr)
- __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff));
- #endif
}
// This is called when we write to a compiled block (see do_invstub)
{
struct ll_entry *head;
struct ll_entry *next;
- u_int old_host_addr=0;
head=jump_in[page];
jump_in[page]=0;
while(head!=NULL) {
while(head!=NULL) {
inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
u_int host_addr=(u_int)kill_pointer(head->addr);
-
- if((host_addr>>12)!=(old_host_addr>>12)) {
- #ifdef __arm__
- __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff));
- #endif
- old_host_addr=host_addr;
- }
+ #ifdef __arm__
+ needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
+ #endif
next=head->next;
free(head);
head=next;
}
- #ifdef __arm__
- if (old_host_addr)
- __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff));
- #endif
}
void invalidate_block(u_int block)
{
for(first=page+1;first<last;first++) {
invalidate_page(first);
}
+ #ifdef __arm__
+ do_clear_cache();
+ #endif
// Don't trap writes
invalid_code[block]=1;
{
invalidate_block(addr>>12);
}
+// This is called when loading a save state.
+// Anything could have changed, so invalidate everything.
void invalidate_all_pages()
{
u_int page,n;
if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
if(rt1[i]) {
alloc_reg(current,i,rt1[i]);
+ if(get_reg(current->regmap,rt1[i])<0) {
+ // dummy load, but we still need a register to calculate the address
+ alloc_reg_temp(current,i,-1);
+ }
if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
{
current->is32&=~(1LL<<rt1[i]);
{
// Load to r0 (dummy load)
// but we still need a register to calculate the address
+ if(opcode[i]==0x22||opcode[i]==0x26)
+ {
+ alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
+ }
alloc_reg_temp(current,i,-1);
+ if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
+ {
+ alloc_all(current,i);
+ alloc_reg64(current,i,FTEMP);
+ }
}
}
alloc_reg(current,i,CSREG); // Load status
if(opcode2[i]<3) // MFC1/DMFC1/CFC1
{
- assert(rt1[i]);
- clear_const(current,rt1[i]);
- if(opcode2[i]==1) {
- alloc_reg64(current,i,rt1[i]); // DMFC1
- current->is32&=~(1LL<<rt1[i]);
- }else{
- alloc_reg(current,i,rt1[i]); // MFC1/CFC1
- current->is32|=1LL<<rt1[i];
+ if(rt1[i]){
+ clear_const(current,rt1[i]);
+ if(opcode2[i]==1) {
+ alloc_reg64(current,i,rt1[i]); // DMFC1
+ current->is32&=~(1LL<<rt1[i]);
+ }else{
+ alloc_reg(current,i,rt1[i]); // MFC1/CFC1
+ current->is32|=1LL<<rt1[i];
+ }
+ dirty_reg(current,rt1[i]);
}
- dirty_reg(current,rt1[i]);
alloc_reg_temp(current,i,-1);
}
else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
#endif
if(offset||s<0||c) addr=tl;
else addr=s;
- if(tl>=0) {
- //assert(tl>=0);
- //assert(rt1[i]);
- reglist&=~(1<<tl);
- if(th>=0) reglist&=~(1<<th);
- if(!using_tlb) {
- if(!c) {
+ //if(tl<0) tl=get_reg(i_regs->regmap,-1);
+ if(tl>=0) {
+ //printf("load_assemble: c=%d\n",c);
+ //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
+ assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
+ reglist&=~(1<<tl);
+ if(th>=0) reglist&=~(1<<th);
+ if(!using_tlb) {
+ if(!c) {
+ #ifdef RAM_OFFSET
+ map=get_reg(i_regs->regmap,ROREG);
+ if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
+ #endif
//#define R29_HACK 1
- #ifdef R29_HACK
- // Strmnnrmn's speed hack
- if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
+ #ifdef R29_HACK
+ // Strmnnrmn's speed hack
+ if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
+ #endif
+ {
+ emit_cmpimm(addr,RAM_SIZE);
+ jaddr=(int)out;
+ #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
+ // Hint to branch predictor that the branch is unlikely to be taken
+ if(rs1[i]>=28)
+ emit_jno_unlikely(0);
+ else
#endif
- {
- emit_cmpimm(addr,RAM_SIZE);
- jaddr=(int)out;
- #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- // Hint to branch predictor that the branch is unlikely to be taken
- if(rs1[i]>=28)
- emit_jno_unlikely(0);
- else
- #endif
- emit_jno(0);
- }
+ emit_jno(0);
}
- }else{ // using tlb
- int x=0;
- if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
- if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
- map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
- do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
- }
- if (opcode[i]==0x20) { // LB
- if(!c||memtarget) {
+ }
+ }else{ // using tlb
+ int x=0;
+ if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
+ if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
+ map=get_reg(i_regs->regmap,TLREG);
+ assert(map>=0);
+ map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
+ do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
+ }
+ int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
+ if (opcode[i]==0x20) { // LB
+ if(!c||memtarget) {
+ if(!dummy) {
#ifdef HOST_IMM_ADDR32
if(c)
emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
//emit_xorimm(addr,3,tl);
//gen_tlb_addr_r(tl,map);
//emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
- int x=0;
+ int x=0,a=tl;
#ifdef BIG_ENDIAN_MIPS
if(!c) emit_xorimm(addr,3,tl);
else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
#else
- if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
- else if (tl!=addr) emit_mov(addr,tl);
+ if(!c) a=addr;
#endif
- emit_movsbl_indexed_tlb(x,tl,map,tl);
+ emit_movsbl_indexed_tlb(x,a,map,tl);
}
- if(jaddr)
- add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- else
- inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ if(jaddr)
+ add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- if (opcode[i]==0x21) { // LH
- if(!c||memtarget) {
+ else
+ inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ }
+ if (opcode[i]==0x21) { // LH
+ if(!c||memtarget) {
+ if(!dummy) {
#ifdef HOST_IMM_ADDR32
if(c)
emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
else
#endif
{
- int x=0;
+ int x=0,a=tl;
#ifdef BIG_ENDIAN_MIPS
if(!c) emit_xorimm(addr,2,tl);
else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
#else
- if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
- else if (tl!=addr) emit_mov(addr,tl);
+ if(!c) a=addr;
#endif
//#ifdef
//emit_movswl_indexed_tlb(x,tl,map,tl);
//else
if(map>=0) {
- gen_tlb_addr_r(tl,map);
- emit_movswl_indexed(x,tl,tl);
- }else
- emit_movswl_indexed((int)rdram-0x80000000+x,tl,tl);
+ gen_tlb_addr_r(a,map);
+ emit_movswl_indexed(x,a,tl);
+ }else{
+ #ifdef RAM_OFFSET
+ emit_movswl_indexed(x,a,tl);
+ #else
+ emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
+ #endif
+ }
}
- if(jaddr)
- add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- else
- inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ if(jaddr)
+ add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- if (opcode[i]==0x23) { // LW
- if(!c||memtarget) {
+ else
+ inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ }
+ if (opcode[i]==0x23) { // LW
+ if(!c||memtarget) {
+ if(!dummy) {
//emit_readword_indexed((int)rdram-0x80000000,addr,tl);
#ifdef HOST_IMM_ADDR32
if(c)
else
#endif
emit_readword_indexed_tlb(0,addr,map,tl);
- if(jaddr)
- add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- else
- inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ if(jaddr)
+ add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- if (opcode[i]==0x24) { // LBU
- if(!c||memtarget) {
+ else
+ inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ }
+ if (opcode[i]==0x24) { // LBU
+ if(!c||memtarget) {
+ if(!dummy) {
#ifdef HOST_IMM_ADDR32
if(c)
emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
//emit_xorimm(addr,3,tl);
//gen_tlb_addr_r(tl,map);
//emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
- int x=0;
+ int x=0,a=tl;
#ifdef BIG_ENDIAN_MIPS
if(!c) emit_xorimm(addr,3,tl);
else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
#else
- if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
- else if (tl!=addr) emit_mov(addr,tl);
+ if(!c) a=addr;
#endif
- emit_movzbl_indexed_tlb(x,tl,map,tl);
+ emit_movzbl_indexed_tlb(x,a,map,tl);
}
- if(jaddr)
- add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- else
- inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ if(jaddr)
+ add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- if (opcode[i]==0x25) { // LHU
- if(!c||memtarget) {
+ else
+ inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ }
+ if (opcode[i]==0x25) { // LHU
+ if(!c||memtarget) {
+ if(!dummy) {
#ifdef HOST_IMM_ADDR32
if(c)
emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
else
#endif
{
- int x=0;
+ int x=0,a=tl;
#ifdef BIG_ENDIAN_MIPS
if(!c) emit_xorimm(addr,2,tl);
else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
#else
- if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
- else if (tl!=addr) emit_mov(addr,tl);
+ if(!c) a=addr;
#endif
//#ifdef
//emit_movzwl_indexed_tlb(x,tl,map,tl);
//#else
if(map>=0) {
- gen_tlb_addr_r(tl,map);
- emit_movzwl_indexed(x,tl,tl);
- }else
- emit_movzwl_indexed((int)rdram-0x80000000+x,tl,tl);
- if(jaddr)
- add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
+ gen_tlb_addr_r(a,map);
+ emit_movzwl_indexed(x,a,tl);
+ }else{
+ #ifdef RAM_OFFSET
+ emit_movzwl_indexed(x,a,tl);
+ #else
+ emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
+ #endif
+ }
}
}
- else
- inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ if(jaddr)
+ add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- if (opcode[i]==0x27) { // LWU
- assert(th>=0);
- if(!c||memtarget) {
+ else
+ inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ }
+ if (opcode[i]==0x27) { // LWU
+ assert(th>=0);
+ if(!c||memtarget) {
+ if(!dummy) {
//emit_readword_indexed((int)rdram-0x80000000,addr,tl);
#ifdef HOST_IMM_ADDR32
if(c)
else
#endif
emit_readword_indexed_tlb(0,addr,map,tl);
- if(jaddr)
- add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
- }
- else {
- inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
}
- emit_zeroreg(th);
+ if(jaddr)
+ add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- if (opcode[i]==0x37) { // LD
- if(!c||memtarget) {
+ else {
+ inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ }
+ emit_zeroreg(th);
+ }
+ if (opcode[i]==0x37) { // LD
+ if(!c||memtarget) {
+ if(!dummy) {
//gen_tlb_addr_r(tl,map);
//if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
//emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
else
#endif
emit_readdword_indexed_tlb(0,addr,map,th,tl);
- if(jaddr)
- add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- else
- inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
+ if(jaddr)
+ add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
}
- //emit_storereg(rt1[i],tl); // DEBUG
+ else
+ inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
}
+ }
+ //emit_storereg(rt1[i],tl); // DEBUG
//if(opcode[i]==0x23)
//if(opcode[i]==0x24)
//if(opcode[i]==0x23||opcode[i]==0x24)
for(hr=0;hr<HOST_REGS;hr++) {
if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
}
- if(tl>=0) {
- assert(temp>=0);
- if(!using_tlb) {
- if(!c) {
- emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
- if(!offset&&s!=temp) emit_mov(s,temp);
- jaddr=(int)out;
- emit_jno(0);
- }
- else
- {
- if(!memtarget||!rs1[i]) {
- jaddr=(int)out;
- emit_jmp(0);
- }
- }
- if((u_int)rdram!=0x80000000)
- emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
- }else{ // using tlb
- int map=get_reg(i_regs->regmap,TLREG);
- assert(map>=0);
- map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
- if(!c&&!offset&&s>=0) emit_mov(s,temp);
- do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
- if(!jaddr&&!memtarget) {
+ assert(temp>=0);
+ if(!using_tlb) {
+ if(!c) {
+ emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
+ if(!offset&&s!=temp) emit_mov(s,temp);
+ jaddr=(int)out;
+ emit_jno(0);
+ }
+ else
+ {
+ if(!memtarget||!rs1[i]) {
jaddr=(int)out;
emit_jmp(0);
}
- gen_tlb_addr_w(temp,map);
}
-
- if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
- temp2=get_reg(i_regs->regmap,FTEMP);
- if(!rs2[i]) temp2=th=tl;
+ #ifdef RAM_OFFSET
+ int map=get_reg(i_regs->regmap,ROREG);
+ if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
+ gen_tlb_addr_w(temp,map);
+ #else
+ if((u_int)rdram!=0x80000000)
+ emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
+ #endif
+ }else{ // using tlb
+ int map=get_reg(i_regs->regmap,TLREG);
+ assert(map>=0);
+ map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
+ if(!c&&!offset&&s>=0) emit_mov(s,temp);
+ do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
+ if(!jaddr&&!memtarget) {
+ jaddr=(int)out;
+ emit_jmp(0);
}
+ gen_tlb_addr_w(temp,map);
+ }
+
+ if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
+ temp2=get_reg(i_regs->regmap,FTEMP);
+ if(!rs2[i]) temp2=th=tl;
+ }
#ifndef BIG_ENDIAN_MIPS
emit_xorimm(temp,3,temp);
#endif
- emit_testimm(temp,2);
- case2=(int)out;
- emit_jne(0);
- emit_testimm(temp,1);
- case1=(int)out;
- emit_jne(0);
- // 0
- if (opcode[i]==0x2A) { // SWL
- emit_writeword_indexed(tl,0,temp);
- }
- if (opcode[i]==0x2E) { // SWR
- emit_writebyte_indexed(tl,3,temp);
- }
- if (opcode[i]==0x2C) { // SDL
- emit_writeword_indexed(th,0,temp);
- if(rs2[i]) emit_mov(tl,temp2);
- }
- if (opcode[i]==0x2D) { // SDR
- emit_writebyte_indexed(tl,3,temp);
- if(rs2[i]) emit_shldimm(th,tl,24,temp2);
- }
+ emit_testimm(temp,2);
+ case2=(int)out;
+ emit_jne(0);
+ emit_testimm(temp,1);
+ case1=(int)out;
+ emit_jne(0);
+ // 0
+ if (opcode[i]==0x2A) { // SWL
+ emit_writeword_indexed(tl,0,temp);
+ }
+ if (opcode[i]==0x2E) { // SWR
+ emit_writebyte_indexed(tl,3,temp);
+ }
+ if (opcode[i]==0x2C) { // SDL
+ emit_writeword_indexed(th,0,temp);
+ if(rs2[i]) emit_mov(tl,temp2);
+ }
+ if (opcode[i]==0x2D) { // SDR
+ emit_writebyte_indexed(tl,3,temp);
+ if(rs2[i]) emit_shldimm(th,tl,24,temp2);
+ }
+ done0=(int)out;
+ emit_jmp(0);
+ // 1
+ set_jump_target(case1,(int)out);
+ if (opcode[i]==0x2A) { // SWL
+ // Write 3 msb into three least significant bytes
+ if(rs2[i]) emit_rorimm(tl,8,tl);
+ emit_writehword_indexed(tl,-1,temp);
+ if(rs2[i]) emit_rorimm(tl,16,tl);
+ emit_writebyte_indexed(tl,1,temp);
+ if(rs2[i]) emit_rorimm(tl,8,tl);
+ }
+ if (opcode[i]==0x2E) { // SWR
+ // Write two lsb into two most significant bytes
+ emit_writehword_indexed(tl,1,temp);
+ }
+ if (opcode[i]==0x2C) { // SDL
+ if(rs2[i]) emit_shrdimm(tl,th,8,temp2);
+ // Write 3 msb into three least significant bytes
+ if(rs2[i]) emit_rorimm(th,8,th);
+ emit_writehword_indexed(th,-1,temp);
+ if(rs2[i]) emit_rorimm(th,16,th);
+ emit_writebyte_indexed(th,1,temp);
+ if(rs2[i]) emit_rorimm(th,8,th);
+ }
+ if (opcode[i]==0x2D) { // SDR
+ if(rs2[i]) emit_shldimm(th,tl,16,temp2);
+ // Write two lsb into two most significant bytes
+ emit_writehword_indexed(tl,1,temp);
+ }
+ done1=(int)out;
+ emit_jmp(0);
+ // 2
+ set_jump_target(case2,(int)out);
+ emit_testimm(temp,1);
+ case3=(int)out;
+ emit_jne(0);
+ if (opcode[i]==0x2A) { // SWL
+ // Write two msb into two least significant bytes
+ if(rs2[i]) emit_rorimm(tl,16,tl);
+ emit_writehword_indexed(tl,-2,temp);
+ if(rs2[i]) emit_rorimm(tl,16,tl);
+ }
+ if (opcode[i]==0x2E) { // SWR
+ // Write 3 lsb into three most significant bytes
+ emit_writebyte_indexed(tl,-1,temp);
+ if(rs2[i]) emit_rorimm(tl,8,tl);
+ emit_writehword_indexed(tl,0,temp);
+ if(rs2[i]) emit_rorimm(tl,24,tl);
+ }
+ if (opcode[i]==0x2C) { // SDL
+ if(rs2[i]) emit_shrdimm(tl,th,16,temp2);
+ // Write two msb into two least significant bytes
+ if(rs2[i]) emit_rorimm(th,16,th);
+ emit_writehword_indexed(th,-2,temp);
+ if(rs2[i]) emit_rorimm(th,16,th);
+ }
+ if (opcode[i]==0x2D) { // SDR
+ if(rs2[i]) emit_shldimm(th,tl,8,temp2);
+ // Write 3 lsb into three most significant bytes
+ emit_writebyte_indexed(tl,-1,temp);
+ if(rs2[i]) emit_rorimm(tl,8,tl);
+ emit_writehword_indexed(tl,0,temp);
+ if(rs2[i]) emit_rorimm(tl,24,tl);
+ }
+ done2=(int)out;
+ emit_jmp(0);
+ // 3
+ set_jump_target(case3,(int)out);
+ if (opcode[i]==0x2A) { // SWL
+ // Write msb into least significant byte
+ if(rs2[i]) emit_rorimm(tl,24,tl);
+ emit_writebyte_indexed(tl,-3,temp);
+ if(rs2[i]) emit_rorimm(tl,8,tl);
+ }
+ if (opcode[i]==0x2E) { // SWR
+ // Write entire word
+ emit_writeword_indexed(tl,-3,temp);
+ }
+ if (opcode[i]==0x2C) { // SDL
+ if(rs2[i]) emit_shrdimm(tl,th,24,temp2);
+ // Write msb into least significant byte
+ if(rs2[i]) emit_rorimm(th,24,th);
+ emit_writebyte_indexed(th,-3,temp);
+ if(rs2[i]) emit_rorimm(th,8,th);
+ }
+ if (opcode[i]==0x2D) { // SDR
+ if(rs2[i]) emit_mov(th,temp2);
+ // Write entire word
+ emit_writeword_indexed(tl,-3,temp);
+ }
+ set_jump_target(done0,(int)out);
+ set_jump_target(done1,(int)out);
+ set_jump_target(done2,(int)out);
+ if (opcode[i]==0x2C) { // SDL
+ emit_testimm(temp,4);
done0=(int)out;
- emit_jmp(0);
- // 1
- set_jump_target(case1,(int)out);
- if (opcode[i]==0x2A) { // SWL
- // Write 3 msb into three least significant bytes
- if(rs2[i]) emit_rorimm(tl,8,tl);
- emit_writehword_indexed(tl,-1,temp);
- if(rs2[i]) emit_rorimm(tl,16,tl);
- emit_writebyte_indexed(tl,1,temp);
- if(rs2[i]) emit_rorimm(tl,8,tl);
- }
- if (opcode[i]==0x2E) { // SWR
- // Write two lsb into two most significant bytes
- emit_writehword_indexed(tl,1,temp);
- }
- if (opcode[i]==0x2C) { // SDL
- if(rs2[i]) emit_shrdimm(tl,th,8,temp2);
- // Write 3 msb into three least significant bytes
- if(rs2[i]) emit_rorimm(th,8,th);
- emit_writehword_indexed(th,-1,temp);
- if(rs2[i]) emit_rorimm(th,16,th);
- emit_writebyte_indexed(th,1,temp);
- if(rs2[i]) emit_rorimm(th,8,th);
- }
- if (opcode[i]==0x2D) { // SDR
- if(rs2[i]) emit_shldimm(th,tl,16,temp2);
- // Write two lsb into two most significant bytes
- emit_writehword_indexed(tl,1,temp);
- }
- done1=(int)out;
- emit_jmp(0);
- // 2
- set_jump_target(case2,(int)out);
- emit_testimm(temp,1);
- case3=(int)out;
emit_jne(0);
- if (opcode[i]==0x2A) { // SWL
- // Write two msb into two least significant bytes
- if(rs2[i]) emit_rorimm(tl,16,tl);
- emit_writehword_indexed(tl,-2,temp);
- if(rs2[i]) emit_rorimm(tl,16,tl);
- }
- if (opcode[i]==0x2E) { // SWR
- // Write 3 lsb into three most significant bytes
- emit_writebyte_indexed(tl,-1,temp);
- if(rs2[i]) emit_rorimm(tl,8,tl);
- emit_writehword_indexed(tl,0,temp);
- if(rs2[i]) emit_rorimm(tl,24,tl);
- }
- if (opcode[i]==0x2C) { // SDL
- if(rs2[i]) emit_shrdimm(tl,th,16,temp2);
- // Write two msb into two least significant bytes
- if(rs2[i]) emit_rorimm(th,16,th);
- emit_writehword_indexed(th,-2,temp);
- if(rs2[i]) emit_rorimm(th,16,th);
- }
- if (opcode[i]==0x2D) { // SDR
- if(rs2[i]) emit_shldimm(th,tl,8,temp2);
- // Write 3 lsb into three most significant bytes
- emit_writebyte_indexed(tl,-1,temp);
- if(rs2[i]) emit_rorimm(tl,8,tl);
- emit_writehword_indexed(tl,0,temp);
- if(rs2[i]) emit_rorimm(tl,24,tl);
- }
- done2=(int)out;
- emit_jmp(0);
- // 3
- set_jump_target(case3,(int)out);
- if (opcode[i]==0x2A) { // SWL
- // Write msb into least significant byte
- if(rs2[i]) emit_rorimm(tl,24,tl);
- emit_writebyte_indexed(tl,-3,temp);
- if(rs2[i]) emit_rorimm(tl,8,tl);
- }
- if (opcode[i]==0x2E) { // SWR
- // Write entire word
- emit_writeword_indexed(tl,-3,temp);
- }
- if (opcode[i]==0x2C) { // SDL
- if(rs2[i]) emit_shrdimm(tl,th,24,temp2);
- // Write msb into least significant byte
- if(rs2[i]) emit_rorimm(th,24,th);
- emit_writebyte_indexed(th,-3,temp);
- if(rs2[i]) emit_rorimm(th,8,th);
- }
- if (opcode[i]==0x2D) { // SDR
- if(rs2[i]) emit_mov(th,temp2);
- // Write entire word
- emit_writeword_indexed(tl,-3,temp);
- }
+ emit_andimm(temp,~3,temp);
+ emit_writeword_indexed(temp2,4,temp);
+ set_jump_target(done0,(int)out);
+ }
+ if (opcode[i]==0x2D) { // SDR
+ emit_testimm(temp,4);
+ done0=(int)out;
+ emit_jeq(0);
+ emit_andimm(temp,~3,temp);
+ emit_writeword_indexed(temp2,-4,temp);
set_jump_target(done0,(int)out);
- set_jump_target(done1,(int)out);
- set_jump_target(done2,(int)out);
- if (opcode[i]==0x2C) { // SDL
- emit_testimm(temp,4);
- done0=(int)out;
- emit_jne(0);
- emit_andimm(temp,~3,temp);
- emit_writeword_indexed(temp2,4,temp);
- set_jump_target(done0,(int)out);
- }
- if (opcode[i]==0x2D) { // SDR
- emit_testimm(temp,4);
- done0=(int)out;
- emit_jeq(0);
- emit_andimm(temp,~3,temp);
- emit_writeword_indexed(temp2,-4,temp);
- set_jump_target(done0,(int)out);
- }
- if(!c||!memtarget)
- add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
}
+ if(!c||!memtarget)
+ add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
if(!using_tlb) {
+ #ifdef RAM_OFFSET
+ int map=get_reg(i_regs->regmap,ROREG);
+ if(map<0) map=HOST_TEMPREG;
+ gen_orig_addr_w(temp,map);
+ #else
emit_addimm_no_flags((u_int)0x80000000-(u_int)rdram,temp);
+ #endif
#if defined(HOST_IMM8)
int ir=get_reg(i_regs->regmap,INVCP);
assert(ir>=0);
#else
emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
#endif
+ #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
+ emit_callne(invalidate_addr_reg[temp]);
+ #else
jaddr2=(int)out;
emit_jne(0);
add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
+ #endif
}
/*
emit_pusha();
{
//if(opcode2[i]==0x10||opcode2[i]==0x12) { // MFHI/MFLO
//if(opcode2[i]==0x11||opcode2[i]==0x13) { // MTHI/MTLO
- //assert(rt1[i]>0);
if(rt1[i]) {
signed char sh,sl,th,tl;
th=get_reg(i_regs->regmap,rt1[i]|64);
else printf("optimizable: yes\n");
}*/
//if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0;
+#ifndef FORCE32
if(requires_32bit[t]&~i_is32) return 0;
- else return 1;
+ else
+#endif
+ return 1;
}
return 0;
}
int mgr=MGEN1+(i&1);
if(itype[i]==LOAD) {
ra=get_reg(i_regs->regmap,rt1[i]);
- //if(rt1[i]) assert(ra>=0);
+ if(ra<0) ra=get_reg(i_regs->regmap,-1);
+ assert(ra>=0);
}
if(itype[i]==LOADLR) {
ra=get_reg(i_regs->regmap,FTEMP);
}
}
//if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0;
+#ifndef FORCE32
if(requires_32bit[t]&~i_is32) return 0;
+#endif
// Delay slots are not valid branch targets
//if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0;
// Delay slots require additional processing, so do not match
//if(opcode2[i]>=0x10) return; // FIXME (BxxZAL)
//assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL)
- if(ooo)
+ if(ooo) {
if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))
- {
- // Write-after-read dependency prevents out of order execution
- // First test branch condition, then execute delay slot, then branch
- ooo=0;
+ {
+ // Write-after-read dependency prevents out of order execution
+ // First test branch condition, then execute delay slot, then branch
+ ooo=0;
+ }
+ if(rt1[i]==31&&(rs1[i+1]==31||rs2[i+1]==31||rt1[i+1]==31||rt2[i+1]==31))
+ // BxxZAL $ra is available to delay insn, so do it in order
+ ooo=0;
}
- assert(opcode2[i]<0x10||ooo); // FIXME (BxxZALL)
if(ooo) {
s1l=get_reg(branch_regs[i].regmap,rs1[i]);
load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
if(rt1[i]==31) {
int rt,return_address;
- assert(rt1[i+1]!=31);
- assert(rt2[i+1]!=31);
rt=get_reg(branch_regs[i].regmap,31);
assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
if(rt>=0) {
// In-order execution (branch first)
//printf("IOE\n");
int nottaken=0;
+ if(rt1[i]==31) {
+ int rt,return_address;
+ rt=get_reg(branch_regs[i].regmap,31);
+ if(rt>=0) {
+ // Save the PC even if the branch is not taken
+ return_address=start+i*4+8;
+ emit_movimm(return_address,rt); // PC into link register
+ #ifdef IMM_PREFETCH
+ emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
+ #endif
+ }
+ }
if(!unconditional) {
//printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
if(!only32)
{
assert(s1h>=0);
- if((opcode2[i]&0x1d)==0) // BLTZ/BLTZL
+ if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
{
emit_test(s1h,s1h);
nottaken=(int)out;
emit_jns(1);
}
- if((opcode2[i]&0x1d)==1) // BGEZ/BGEZL
+ if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
{
emit_test(s1h,s1h);
nottaken=(int)out;
else
{
assert(s1l>=0);
- if((opcode2[i]&0x1d)==0) // BLTZ/BLTZL
+ if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
{
emit_test(s1l,s1l);
nottaken=(int)out;
emit_jns(1);
}
- if((opcode2[i]&0x1d)==1) // BGEZ/BGEZL
+ if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
{
emit_test(s1l,s1l);
nottaken=(int)out;
else ba[i]=-1;
/* Is this the end of the block? */
if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) {
+#ifdef PCSX
+ // check for link register access in delay slot
+ int rt1_=rt1[i-1];
+ if(rt1_!=0&&(rs1[i]==rt1_||rs2[i]==rt1_||rt1[i]==rt1_||rt2[i]==rt1_)) {
+ printf("link access in delay slot @%08x (%08x)\n", addr + i*4, addr);
+ ba[i-1]=-1;
+ itype[i-1]=INTCALL;
+ done=2;
+ }
+ else
+#endif
if(rt1[i-1]==0) { // Continue past subroutine call (JAL)
done=2;
}
int ds=0;
int cc=0;
int hr;
-
+
+#ifndef FORCE32
provisional_32bit();
-
+#endif
if((u_int)addr&1) {
// First instruction is delay slot
cc=-1;
}
}
}
+#ifndef FORCE32
// If something jumps here with 64-bit values
// then promote those registers to 64 bits
if(bt[i])
current.is32=temp_is32;
}
}
-#ifdef FORCE32
- memset(p32, 0xff, sizeof(p32));
+#else
current.is32=-1LL;
#endif
regs[i].wasconst=current.isconst;
regs[i].was32=current.is32;
regs[i].wasdirty=current.dirty;
- #ifdef DESTRUCTIVE_WRITEBACK
+ #if defined(DESTRUCTIVE_WRITEBACK) && !defined(FORCE32)
// To change a dirty register from 32 to 64 bits, we must write
// it out during the previous cycle (for branches, 2 cycles)
if(i<slen-1&&bt[i+1]&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP)
if (rt1[i]==31) {
alloc_reg(¤t,i,31);
dirty_reg(¤t,31);
- //assert(rs1[i+1]!=31&&rs2[i+1]!=31);
+ assert(rs1[i+1]!=31&&rs2[i+1]!=31);
assert(rt1[i+1]!=rt1[i]);
#ifdef REG_PREFETCH
alloc_reg(¤t,i,PTEMP);
if (rt1[i]!=0) {
alloc_reg(¤t,i,rt1[i]);
dirty_reg(¤t,rt1[i]);
- //assert(rs1[i+1]!=31&&rs2[i+1]!=31);
+ assert(rs1[i+1]!=rt1[i]&&rs2[i+1]!=rt1[i]);
assert(rt1[i+1]!=rt1[i]);
#ifdef REG_PREFETCH
alloc_reg(¤t,i,PTEMP);
if (rt1[i]==31) { // BLTZAL/BGEZAL
alloc_reg(¤t,i,31);
dirty_reg(¤t,31);
- assert(rs1[i+1]!=31&&rs2[i+1]!=31);
//#ifdef REG_PREFETCH
//alloc_reg(¤t,i,PTEMP);
//#endif
clean_registers(0,slen-1,1);
/* Pass 7 - Identify 32-bit registers */
-
+#ifndef FORCE32
provisional_r32();
u_int r32=0;
}
//requires_32bit[i]=is32[i]&~unneeded_reg_upper[i]; // DEBUG
}
+#endif
if(itype[slen-1]==SPAN) {
bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception
u_int vpage=get_vpage(vaddr);
literal_pool(256);
//if(!(is32[i]&(~unneeded_reg_upper[i])&~(1LL<<CCREG)))
+#ifndef FORCE32
if(!requires_32bit[i])
+#else
+ if(1)
+#endif
{
assem_debug("%8x (%d) <- %8x\n",instr_addr[i],i,start+i*4);
assem_debug("jump_in: %x\n",start+i*4);
break;
case 3:
// Clear jump_out
+ #ifdef __arm__
+ if((expirep&2047)==0)
+ do_clear_cache();
+ #endif
ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift);
ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift);
break;