#include <stdlib.h>
#include <stdint.h> //include for uint64_t
#include <assert.h>
+#include <sys/mman.h>
#include "emu_if.h" //emulator interface
-#include <sys/mman.h>
+//#define DISASM
+//#define assem_debug printf
+//#define inv_debug printf
+#define assem_debug(...)
+#define inv_debug(...)
#ifdef __i386__
#include "assem_x86.h"
u_char dep1[MAXBLOCK];
u_char dep2[MAXBLOCK];
u_char lt1[MAXBLOCK];
+ static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
+ static uint64_t gte_rt[MAXBLOCK];
+ static uint64_t gte_unneeded[MAXBLOCK];
+ static int gte_reads_flags; // gte flag read encountered
int imm[MAXBLOCK];
u_int ba[MAXBLOCK];
char likely[MAXBLOCK];
static const u_int using_tlb=0;
#endif
static u_int sp_in_mirror;
+ int new_dynarec_did_compile;
u_int stop_after_jal;
extern u_char restore_candidate[512];
extern int cycle_count;
//#define DEBUG_CYCLE_COUNT 1
-void nullf() {}
-//#define assem_debug printf
-//#define inv_debug printf
-#define assem_debug nullf
-#define inv_debug nullf
-
static void tlb_hacks()
{
#ifndef DISABLE_TLB
if(verify_dirty(head->addr)) {
//printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
invalid_code[vaddr>>12]=0;
+ inv_code_start=inv_code_end=~0;
memory_map[vaddr>>12]|=0x40000000;
if(vpage<2048) {
#ifndef DISABLE_TLB
if(verify_dirty(head->addr)) {
//printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
invalid_code[vaddr>>12]=0;
+ inv_code_start=inv_code_end=~0;
memory_map[vaddr>>12]|=0x40000000;
if(vpage<2048) {
#ifndef DISABLE_TLB
}
}
-
+#ifndef FORCE32
void div64(int64_t dividend,int64_t divisor)
{
lo=dividend/divisor;
else original=loaded;
return original;
}
+#endif
#ifdef __i386__
#include "assem_x86.c"
head=next;
}
}
-void invalidate_block(u_int block)
+
+static void invalidate_block_range(u_int block, u_int first, u_int last)
{
u_int page=get_page(block<<12);
- u_int vpage=get_vpage(block<<12);
- inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
- //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
- u_int first,last;
- first=last=page;
- struct ll_entry *head;
- head=jump_dirty[vpage];
- //printf("page=%d vpage=%d\n",page,vpage);
- while(head!=NULL) {
- u_int start,end;
- if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
- get_bounds((int)head->addr,&start,&end);
- //printf("start: %x end: %x\n",start,end);
- if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) {
- if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
- if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
- if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
- }
- }
-#ifndef DISABLE_TLB
- if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
- if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
- if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
- if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
- }
- }
-#endif
- }
- head=head->next;
- }
//printf("first=%d last=%d\n",first,last);
invalidate_page(page);
assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
// Don't trap writes
invalid_code[block]=1;
-#ifdef PCSX
- invalid_code[((u_int)0x80000000>>12)|page]=1;
-#endif
#ifndef DISABLE_TLB
// If there is a valid TLB entry for this page, remove write protect
if(tlb_LUT_w[block]) {
memset(mini_ht,-1,sizeof(mini_ht));
#endif
}
+
+void invalidate_block(u_int block)
+{
+ u_int page=get_page(block<<12);
+ u_int vpage=get_vpage(block<<12);
+ inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
+ //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
+ u_int first,last;
+ first=last=page;
+ struct ll_entry *head;
+ head=jump_dirty[vpage];
+ //printf("page=%d vpage=%d\n",page,vpage);
+ while(head!=NULL) {
+ u_int start,end;
+ if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
+ get_bounds((int)head->addr,&start,&end);
+ //printf("start: %x end: %x\n",start,end);
+ if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) {
+ if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
+ if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
+ if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
+ }
+ }
+#ifndef DISABLE_TLB
+ if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
+ if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
+ if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
+ if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
+ }
+ }
+#endif
+ }
+ head=head->next;
+ }
+ invalidate_block_range(block,first,last);
+}
+
void invalidate_addr(u_int addr)
{
+#ifdef PCSX
+ //static int rhits;
+ // this check is done by the caller
+ //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
+ u_int page=get_page(addr);
+ if(page<2048) { // RAM
+ struct ll_entry *head;
+ u_int addr_min=~0, addr_max=0;
+ int mask=RAM_SIZE-1;
+ int pg1;
+ inv_code_start=addr&~0xfff;
+ inv_code_end=addr|0xfff;
+ pg1=page;
+ if (pg1>0) {
+ // must check previous page too because of spans..
+ pg1--;
+ inv_code_start-=0x1000;
+ }
+ for(;pg1<=page;pg1++) {
+ for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
+ u_int start,end;
+ get_bounds((int)head->addr,&start,&end);
+ if((start&mask)<=(addr&mask)&&(addr&mask)<(end&mask)) {
+ if(start<addr_min) addr_min=start;
+ if(end>addr_max) addr_max=end;
+ }
+ else if(addr<start) {
+ if(start<inv_code_end)
+ inv_code_end=start-1;
+ }
+ else {
+ if(end>inv_code_start)
+ inv_code_start=end;
+ }
+ }
+ }
+ if (addr_min!=~0) {
+ inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
+ inv_code_start=inv_code_end=~0;
+ invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
+ return;
+ }
+ else {
+ inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);//rhits);
+ }
+ //rhits=0;
+ if(page!=0) // FIXME: don't know what's up with page 0 (Klonoa)
+ return;
+ }
+#endif
invalidate_block(addr>>12);
}
+
// This is called when loading a save state.
// Anything could have changed, so invalidate everything.
void invalidate_all_pages()
void shiftimm_alloc(struct regstat *current,int i)
{
- clear_const(current,rs1[i]);
- clear_const(current,rt1[i]);
if(opcode2[i]<=0x3) // SLL/SRL/SRA
{
if(rt1[i]) {
alloc_reg(current,i,rt1[i]);
current->is32|=1LL<<rt1[i];
dirty_reg(current,rt1[i]);
+ if(is_const(current,rs1[i])) {
+ int v=get_const(current,rs1[i]);
+ if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
+ if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
+ if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
+ }
+ else clear_const(current,rt1[i]);
}
}
+ else
+ {
+ clear_const(current,rs1[i]);
+ clear_const(current,rt1[i]);
+ }
+
if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
{
if(rt1[i]) {
t=get_reg(i_regs->regmap,rt1[i]);
s=get_reg(i_regs->regmap,rs1[i]);
//assert(t>=0);
- if(t>=0){
+ if(t>=0&&!((i_regs->isconst>>t)&1)){
if(rs1[i]==0)
{
emit_zeroreg(t);
}
type=STORED_STUB;
}
+#ifdef PCSX
+ if(jaddr) {
+ // PCSX store handlers don't check invcode again
+ reglist|=1<<addr;
+ add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
+ jaddr=0;
+ }
+#endif
if(!using_tlb) {
if(!c||memtarget) {
#ifdef DESTRUCTIVE_SHIFT
void unneeded_registers(int istart,int iend,int r)
{
int i;
- uint64_t u,uu,b,bu;
- uint64_t temp_u,temp_uu;
+ uint64_t u,uu,gte_u,b,bu,gte_bu;
+ uint64_t temp_u,temp_uu,temp_gte_u;
uint64_t tdep;
if(iend==slen-1) {
u=1;uu=1;
uu=unneeded_reg_upper[iend+1];
u=1;uu=1;
}
+ gte_u=temp_gte_u=0;
+
for (i=iend;i>=istart;i--)
{
//printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
// Branch out of this block, flush all regs
u=1;
uu=1;
+ gte_u=0;
/* Hexagon hack
if(itype[i]==UJUMP&&rt1[i]==31)
{
uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
u|=1;uu|=1;
+ gte_u|=gte_rt[i+1];
+ gte_u&=~gte_rs[i+1];
// If branch is "likely" (and conditional)
// then we skip the delay slot on the fall-thru path
if(likely[i]) {
if(i<slen-1) {
u&=unneeded_reg[i+2];
uu&=unneeded_reg_upper[i+2];
+ gte_u&=gte_unneeded[i+2];
}
else
{
u=1;
uu=1;
+ gte_u=0;
}
}
}
{
// Unconditional branch
temp_u=1;temp_uu=1;
+ temp_gte_u=0;
} else {
// Conditional branch (not taken case)
temp_u=unneeded_reg[i+2];
temp_uu=unneeded_reg_upper[i+2];
+ temp_gte_u&=gte_unneeded[i+2];
}
// Merge in delay slot
tdep=(~temp_uu>>rt1[i+1])&1;
temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
temp_u|=1;temp_uu|=1;
+ temp_gte_u|=gte_rt[i+1];
+ temp_gte_u&=~gte_rs[i+1];
// If branch is "likely" (and conditional)
// then we skip the delay slot on the fall-thru path
if(likely[i]) {
if(i<slen-1) {
temp_u&=unneeded_reg[i+2];
temp_uu&=unneeded_reg_upper[i+2];
+ temp_gte_u&=gte_unneeded[i+2];
}
else
{
temp_u=1;
temp_uu=1;
+ temp_gte_u=0;
}
}
tdep=(~temp_uu>>rt1[i])&1;
temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
temp_u|=1;temp_uu|=1;
+ temp_gte_u|=gte_rt[i];
+ temp_gte_u&=~gte_rs[i];
unneeded_reg[i]=temp_u;
unneeded_reg_upper[i]=temp_uu;
+ gte_unneeded[i]=temp_gte_u;
// Only go three levels deep. This recursion can take an
// excessive amount of time if there are a lot of nested loops.
if(r<2) {
}else{
unneeded_reg[(ba[i]-start)>>2]=1;
unneeded_reg_upper[(ba[i]-start)>>2]=1;
+ gte_unneeded[(ba[i]-start)>>2]=0;
}
} /*else*/ if(1) {
if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
// Unconditional branch
u=unneeded_reg[(ba[i]-start)>>2];
uu=unneeded_reg_upper[(ba[i]-start)>>2];
+ gte_u=gte_unneeded[(ba[i]-start)>>2];
branch_unneeded_reg[i]=u;
branch_unneeded_reg_upper[i]=uu;
//u=1;
uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
u|=1;uu|=1;
+ gte_u|=gte_rt[i+1];
+ gte_u&=~gte_rs[i+1];
} else {
// Conditional branch
b=unneeded_reg[(ba[i]-start)>>2];
bu=unneeded_reg_upper[(ba[i]-start)>>2];
+ gte_bu=gte_unneeded[(ba[i]-start)>>2];
branch_unneeded_reg[i]=b;
branch_unneeded_reg_upper[i]=bu;
//b=1;
bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
b|=1;bu|=1;
+ gte_bu|=gte_rt[i+1];
+ gte_bu&=~gte_rs[i+1];
// If branch is "likely" then we skip the
// delay slot on the fall-thru path
if(likely[i]) {
u=b;
uu=bu;
+ gte_u=gte_bu;
if(i<slen-1) {
u&=unneeded_reg[i+2];
uu&=unneeded_reg_upper[i+2];
+ gte_u&=gte_unneeded[i+2];
//u=1;
//uu=1;
}
} else {
u&=b;
uu&=bu;
+ gte_u&=gte_bu;
//u=1;
//uu=1;
}
u|=1LL<<rt2[i];
uu|=1LL<<rt1[i];
uu|=1LL<<rt2[i];
+ gte_u|=gte_rt[i];
// Accessed registers are needed
u&=~(1LL<<rs1[i]);
u&=~(1LL<<rs2[i]);
uu&=~(1LL<<us1[i]);
uu&=~(1LL<<us2[i]);
+ gte_u&=~gte_rs[i];
// Source-target dependencies
uu&=~(tdep<<dep1[i]);
uu&=~(tdep<<dep2[i]);
// Save it
unneeded_reg[i]=u;
unneeded_reg_upper[i]=uu;
+ gte_unneeded[i]=gte_u;
/*
printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
printf("U:");
}
}
+#ifdef DISASM
/* disassembly */
void disassemble_inst(int i)
{
printf (" %x: %s\n",start+i*4,insn[i]);
}
}
+#else
+static void disassemble_inst(int i) {}
+#endif // DISASM
// clear the state completely, instead of just marking
// things invalid like invalidate_all_pages() does
pending_exception=0;
literalcount=0;
stop_after_jal=0;
+ inv_code_start=inv_code_end=~0;
+ gte_reads_flags=0;
// TLB
#ifndef DISABLE_TLB
using_tlb=0;
//rlist();
start = (u_int)addr&~3;
//assert(((u_int)addr&1)==0);
+ new_dynarec_did_compile=1;
#ifdef PCSX
if(!sp_in_mirror&&(signed int)(psxRegs.GPR.n.sp&0xffe00000)>0x80200000&&
0x10000<=psxRegs.GPR.n.sp&&(psxRegs.GPR.n.sp&~0xe0e00000)<RAM_SIZE) {
emit_movimm(start,0);
emit_writeword(0,(int)&pcaddr);
emit_jmp((int)new_dyna_leave);
+ literal_pool(0);
#ifdef __arm__
__clear_cache((void *)beginning,out);
#endif
#endif
#ifdef PCSX
case 0x12: strcpy(insn[i],"COP2"); type=NI;
- // note: COP MIPS-1 encoding differs from MIPS32
op2=(source[i]>>21)&0x1f;
- if (source[i]&0x3f) {
+ //if (op2 & 0x10) {
+ if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns
if (gte_handlers[source[i]&0x3f]!=NULL) {
- snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
+ if (gte_regnames[source[i]&0x3f]!=NULL)
+ strcpy(insn[i],gte_regnames[source[i]&0x3f]);
+ else
+ snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
type=C2OP;
}
}
us2[i]=0;
dep1[i]=0;
dep2[i]=0;
+ gte_rs[i]=gte_rt[i]=0;
switch(type) {
case LOAD:
rs1[i]=(source[i]>>21)&0x1f;
if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET
break;
case COP1:
- case COP2:
rs1[i]=0;
rs2[i]=0;
rt1[i]=0;
if(op2==5) us1[i]=rs1[i]; // DMTC1
rs2[i]=CSREG;
break;
+ case COP2:
+ rs1[i]=0;
+ rs2[i]=0;
+ rt1[i]=0;
+ rt2[i]=0;
+ if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2
+ if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2
+ rs2[i]=CSREG;
+ int gr=(source[i]>>11)&0x1F;
+ switch(op2)
+ {
+ case 0x00: gte_rs[i]=1ll<<gr; break; // MFC2
+ case 0x04: gte_rt[i]=1ll<<gr; break; // MTC2
+ case 0x02: gte_rs[i]=1ll<<(gr+32); // CFC2
+ if(gr==31&&!gte_reads_flags) {
+ assem_debug("gte flag read encountered @%08x\n",addr + i*4);
+ gte_reads_flags=1;
+ }
+ break;
+ case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
+ }
+ break;
case C1LS:
rs1[i]=(source[i]>>21)&0x1F;
rs2[i]=CSREG;
rt1[i]=0;
rt2[i]=0;
imm[i]=(short)source[i];
+ if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2
+ else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2
+ break;
+ case C2OP:
+ rs1[i]=0;
+ rs2[i]=0;
+ rt1[i]=0;
+ rt2[i]=0;
+ gte_rt[i]=1ll<<63; // every op changes flags
+ // TODO: other regs?
break;
case FLOAT:
case FCONV:
if(itype[slen-1]==SPAN) {
bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception
}
-
+
+#ifdef DISASM
/* Debug/disassembly */
- if((void*)assem_debug==(void*)printf)
for(i=0;i<slen;i++)
{
printf("U:");
#endif
}
}
+#endif // DISASM
/* Pass 8 - Assembly */
linkcount=0;stubcount=0;
for(i=0;i<slen;i++)
{
//if(ds) printf("ds: ");
- if((void*)assem_debug==(void*)printf) disassemble_inst(i);
+ disassemble_inst(i);
if(ds) {
ds=0; // Skip delay slot
if(bt[i]) assem_debug("OOPS - branch into delay slot\n");
}
#endif
}
+ inv_code_start=inv_code_end=~0;
#ifdef PCSX
- // PCSX maps all RAM mirror invalid_code tests to 0x80000000..0x80000000+RAM_SIZE
+ // for PCSX we need to mark all mirrors too
if(get_page(start)<(RAM_SIZE>>12))
for(i=start>>12;i<=(start+slen*4)>>12;i++)
- invalid_code[((u_int)0x80000000>>12)|i]=0;
+ invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
+ invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
+ invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
#endif
/* Pass 10 - Free memory by expiring oldest blocks */