#include "assem_arm.h"
#endif
+#ifdef __BLACKBERRY_QNX__
+#undef __clear_cache
+#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
+#endif
+
#define MAXBLOCK 4096
#define MAX_OUTPUT_BLOCK_SIZE 262144
int new_dynarec_did_compile;
int new_dynarec_hacks;
u_int stop_after_jal;
+#ifndef RAM_FIXED
+ static u_int ram_offset;
+#else
+ static const u_int ram_offset=0;
+#endif
extern u_char restore_candidate[512];
extern int cycle_count;
//#define DEBUG_CYCLE_COUNT 1
+#define NO_CYCLE_PENALTY_THR 12
+
int cycle_multiplier; // 100 for 1.0
static int CLOCK_ADJUST(int x)
if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
get_bounds((int)head->addr,&start,&end);
//printf("start: %x end: %x\n",start,end);
- if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) {
+ if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
if(page<2048) { // RAM
struct ll_entry *head;
u_int addr_min=~0, addr_max=0;
- int mask=RAM_SIZE-1;
+ u_int mask=RAM_SIZE-1;
+ u_int addr_main=0x80000000|(addr&mask);
int pg1;
- inv_code_start=addr&~0xfff;
- inv_code_end=addr|0xfff;
+ inv_code_start=addr_main&~0xfff;
+ inv_code_end=addr_main|0xfff;
pg1=page;
if (pg1>0) {
// must check previous page too because of spans..
for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
u_int start,end;
get_bounds((int)head->addr,&start,&end);
- if((start&mask)<=(addr&mask)&&(addr&mask)<(end&mask)) {
+ if(ram_offset) {
+ start-=ram_offset;
+ end-=ram_offset;
+ }
+ if(start<=addr_main&&addr_main<end) {
if(start<addr_min) addr_min=start;
if(end>addr_max) addr_max=end;
}
- else if(addr<start) {
+ else if(addr_main<start) {
if(start<inv_code_end)
inv_code_end=start-1;
}
return;
}
else {
+ inv_code_start=(addr&~mask)|(inv_code_start&mask);
+ inv_code_end=(addr&~mask)|(inv_code_end&mask);
inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
return;
}
jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
}
}
+ else if(ram_offset&&memtarget) {
+ emit_addimm(addr,ram_offset,HOST_TEMPREG);
+ fastload_reg_override=HOST_TEMPREG;
+ }
}else{ // using tlb
int x=0;
if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
gen_tlb_addr_r(a,map);
emit_movswl_indexed(x,a,tl);
}else{
- #ifdef RAM_OFFSET
+ #if 1 //def RAM_OFFSET
emit_movswl_indexed(x,a,tl);
#else
emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
gen_tlb_addr_r(a,map);
emit_movzwl_indexed(x,a,tl);
}else{
- #ifdef RAM_OFFSET
+ #if 1 //def RAM_OFFSET
emit_movzwl_indexed(x,a,tl);
#else
emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
#endif
}
+ else if(ram_offset&&memtarget) {
+ emit_addimm(addr,ram_offset,HOST_TEMPREG);
+ faststore_reg_override=HOST_TEMPREG;
+ }
}else{ // using tlb
int x=0;
if (opcode[i]==0x28) x=3; // SB
gen_tlb_addr_w(a,map);
emit_writehword_indexed(tl,x,a);
}else
- emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
+ //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
+ emit_writehword_indexed(tl,x,a);
}
type=STOREH_STUB;
}
#endif
}
}
+ u_int addr_val=constmap[i][s]+offset;
if(jaddr) {
add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
} else if(c&&!memtarget) {
- inline_writestub(type,i,constmap[i][s]+offset,i_regs->regmap,rs2[i],ccadj[i],reglist);
+ inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
+ }
+ // basic current block modification detection..
+ // not looking back as that should be in mips cache already
+ if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
+ printf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
+ assert(i_regs->regmap==regs[i].regmap); // not delay slot
+ if(i_regs->regmap==regs[i].regmap) {
+ load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
+ wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
+ emit_movimm(start+i*4+4,0);
+ emit_writeword(0,(int)&pcaddr);
+ emit_jmp((int)do_interrupt);
+ }
}
//if(opcode[i]==0x2B || opcode[i]==0x3F)
//if(opcode[i]==0x2B || opcode[i]==0x28)
if(!c) {
jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override);
}
+ else if(ram_offset&&memtarget) {
+ emit_addimm(ar,ram_offset,HOST_TEMPREG);
+ fastio_reg_override=HOST_TEMPREG;
+ }
if (opcode[i]==0x32) { // LWC2
#ifdef HOST_IMM_ADDR32
if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl);
int count;
int jaddr;
int idle=0;
+ int t=0;
if(itype[i]==RJUMP)
{
*adj=0;
//if(ba[i]>=start && ba[i]<(start+slen*4))
if(internal_branch(branch_regs[i].is32,ba[i]))
{
- int t=(ba[i]-start)>>2;
+ t=(ba[i]-start)>>2;
if(is_ds[t]) *adj=-1; // Branch into delay slot adds an extra cycle
else *adj=ccadj[t];
}
emit_jmp(0);
}
else if(*adj==0||invert) {
- emit_addimm_and_set_flags(CLOCK_ADJUST(count+2),HOST_CCREG);
+ int cycles=CLOCK_ADJUST(count+2);
+ // faster loop HACK
+ if (t&&*adj) {
+ int rel=t-i;
+ if(-NO_CYCLE_PENALTY_THR<rel&&rel<0)
+ cycles=CLOCK_ADJUST(*adj)+count+2-*adj;
+ }
+ emit_addimm_and_set_flags(cycles,HOST_CCREG);
jaddr=(int)out;
emit_jns(0);
}
{
printf("Init new dynarec\n");
out=(u_char *)BASE_ADDR;
+#if BASE_ADDR_FIXED
if (mmap (out, 1<<TARGET_SIZE_2,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0) <= 0) {printf("mmap() failed\n");}
+#else
+ // not all systems allow execute in data segment by default
+ if (mprotect(out, 1<<TARGET_SIZE_2, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
+ printf("mprotect() failed\n");
+#endif
#ifdef MUPEN64
rdword=&readmem_dword;
fake_pc.f.r.rs=&readmem_dword;
#endif
tlb_hacks();
arch_init();
+#ifndef RAM_FIXED
+ ram_offset=(u_int)rdram-0x80000000;
+#endif
+ if (ram_offset!=0)
+ printf("warning: RAM is not directly mapped, performance will suffer\n");
}
void new_dynarec_cleanup()
{
int n;
+ #if BASE_ADDR_FIXED
if (munmap ((void *)BASE_ADDR, 1<<TARGET_SIZE_2) < 0) {printf("munmap() failed\n");}
+ #endif
for(n=0;n<4096;n++) ll_clear(jump_in+n);
for(n=0;n<4096;n++) ll_clear(jump_out+n);
for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
// GTE runs in parallel until accessed, divide by 2 for a rough guess
cc+=gte_cycletab[source[i]&0x3f]/2;
}
- else if(/*itype[i]==LOAD||*/itype[i]==STORE||itype[i]==C1LS) // load causes weird timing issues
+ else if(/*itype[i]==LOAD||itype[i]==STORE||*/itype[i]==C1LS) // load,store causes weird timing issues
{
cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER)
}
// If we're within 256K of the end of the buffer,
// start over from the beginning. (Is 256K enough?)
- if((int)out>BASE_ADDR+(1<<TARGET_SIZE_2)-MAX_OUTPUT_BLOCK_SIZE) out=(u_char *)BASE_ADDR;
+ if((u_int)out>(u_int)BASE_ADDR+(1<<TARGET_SIZE_2)-MAX_OUTPUT_BLOCK_SIZE) out=(u_char *)BASE_ADDR;
// Trap writes to any of the pages we compiled
for(i=start>>12;i<=(start+slen*4)>>12;i++) {
/* Pass 10 - Free memory by expiring oldest blocks */
- int end=((((int)out-BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535;
+ int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535;
while(expirep!=end)
{
int shift=TARGET_SIZE_2-3; // Divide into 8 blocks
- int base=BASE_ADDR+((expirep>>13)<<shift); // Base address of this block
+ int base=(int)BASE_ADDR+((expirep>>13)<<shift); // Base address of this block
inv_debug("EXP: Phase %d\n",expirep);
switch((expirep>>11)&3)
{