#include "assem_arm.h"
#endif
+#ifdef __BLACKBERRY_QNX__
+#undef __clear_cache
+#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
+#endif
+
#define MAXBLOCK 4096
#define MAX_OUTPUT_BLOCK_SIZE 262144
//#define DEBUG_CYCLE_COUNT 1
+#define NO_CYCLE_PENALTY_THR 12
+
int cycle_multiplier; // 100 for 1.0
static int CLOCK_ADJUST(int x)
if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
get_bounds((int)head->addr,&start,&end);
//printf("start: %x end: %x\n",start,end);
- if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) {
+ if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
if(page<2048) { // RAM
struct ll_entry *head;
u_int addr_min=~0, addr_max=0;
- int mask=RAM_SIZE-1;
+ u_int mask=RAM_SIZE-1;
+ u_int addr_main=0x80000000|(addr&mask);
int pg1;
- inv_code_start=addr&~0xfff;
- inv_code_end=addr|0xfff;
+ inv_code_start=addr_main&~0xfff;
+ inv_code_end=addr_main|0xfff;
pg1=page;
if (pg1>0) {
// must check previous page too because of spans..
for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
u_int start,end;
get_bounds((int)head->addr,&start,&end);
- if((start&mask)<=(addr&mask)&&(addr&mask)<(end&mask)) {
+ if(ram_offset) {
+ start-=ram_offset;
+ end-=ram_offset;
+ }
+ if(start<=addr_main&&addr_main<end) {
if(start<addr_min) addr_min=start;
if(end>addr_max) addr_max=end;
}
- else if(addr<start) {
+ else if(addr_main<start) {
if(start<inv_code_end)
inv_code_end=start-1;
}
return;
}
else {
+ inv_code_start=(addr&~mask)|(inv_code_start&mask);
+ inv_code_end=(addr&~mask)|(inv_code_end&mask);
inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
return;
}
int count;
int jaddr;
int idle=0;
+ int t=0;
if(itype[i]==RJUMP)
{
*adj=0;
//if(ba[i]>=start && ba[i]<(start+slen*4))
if(internal_branch(branch_regs[i].is32,ba[i]))
{
- int t=(ba[i]-start)>>2;
+ t=(ba[i]-start)>>2;
if(is_ds[t]) *adj=-1; // Branch into delay slot adds an extra cycle
else *adj=ccadj[t];
}
emit_jmp(0);
}
else if(*adj==0||invert) {
- emit_addimm_and_set_flags(CLOCK_ADJUST(count+2),HOST_CCREG);
+ int cycles=CLOCK_ADJUST(count+2);
+ // faster loop HACK
+ if (t&&*adj) {
+ int rel=t-i;
+ if(-NO_CYCLE_PENALTY_THR<rel&&rel<0)
+ cycles=CLOCK_ADJUST(*adj)+count+2-*adj;
+ }
+ emit_addimm_and_set_flags(cycles,HOST_CCREG);
jaddr=(int)out;
emit_jns(0);
}
#ifndef RAM_FIXED
ram_offset=(u_int)rdram-0x80000000;
#endif
+ if (ram_offset!=0)
+ printf("warning: RAM is not directly mapped, performance will suffer\n");
}
void new_dynarec_cleanup()
// GTE runs in parallel until accessed, divide by 2 for a rough guess
cc+=gte_cycletab[source[i]&0x3f]/2;
}
- else if(/*itype[i]==LOAD||*/itype[i]==STORE||itype[i]==C1LS) // load causes weird timing issues
+ else if(/*itype[i]==LOAD||itype[i]==STORE||*/itype[i]==C1LS) // load,store causes weird timing issues
{
cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER)
}