From 4aa12b44ca21021f3a364052ffb44ba34df5c563 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 18 Feb 2026 04:35:36 +0200 Subject: [PATCH] drc: reduce use of globals that mem is basically lost while not compiling --- libpcsxcore/new_dynarec/assem_arm.c | 67 +- libpcsxcore/new_dynarec/assem_arm64.c | 29 +- libpcsxcore/new_dynarec/new_dynarec.c | 1292 +++++++++++++------------ 3 files changed, 721 insertions(+), 667 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index e8092c3a..a725b7d0 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -939,7 +939,7 @@ static int can_jump_or_call(const void *a) static void emit_call(const void *a_) { int a = (int)a_; - assem_debug("bl %p%s\n", log_addr(a), func_name(a_)); + assem_debug("bl %p%s\n", log_addr(a_), func_name(a_)); u_int offset=genjmp(a); output_w32(0xeb000000|offset); } @@ -1540,7 +1540,7 @@ static void literal_pool_jumpover(int n) // parsed by find_extjump_insn, check_extjump2 static void emit_extjump_stub(u_char *addr, u_int target) { - u_char *ptr=(u_char *)addr; + u_char *ptr = addr; assert((ptr[3]&0x0e)==0xa); (void)ptr; @@ -1628,9 +1628,9 @@ static void mov_loadtype_adj(enum stub_type type,int rs,int rt) #include "pcsxmem.h" #include "pcsxmem_inline.c" -static void do_readstub(int n) +static void do_readstub(struct compile_state *st, int n) { - assem_debug("do_readstub %p\n", log_addr(start + stubs[n].a*4)); + assem_debug("do_readstub %x\n", st->start + stubs[n].a*4); literal_pool(256); set_jump_target(stubs[n].addr, out); enum stub_type type=stubs[n].type; @@ -1799,9 +1799,9 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, restore_regs(reglist); } -static void do_writestub(int n) +static void do_writestub(struct compile_state *st, int n) { - assem_debug("do_writestub %p\n", log_addr(start + stubs[n].a*4)); + assem_debug("do_writestub %x\n", st->start + stubs[n].a*4); literal_pool(256); set_jump_target(stubs[n].addr, out); enum stub_type type=stubs[n].type; @@ -1919,10 +1919,11 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, /* Special assem */ -static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) +static void c2op_prologue(struct compile_state *st, u_int op, int i, + const struct regstat *i_regs, u_int reglist) { save_regs_all(reglist); - cop2_do_stall_check(op, i, i_regs, 0); + cop2_do_stall_check(st, op, i, i_regs, 0); #ifdef PCNT emit_movimm(op, 0); emit_far_call(pcnt_gte_start); @@ -1957,9 +1958,9 @@ static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf); } -static void c2op_assemble(int i, const struct regstat *i_regs) +static void c2op_assemble(struct compile_state *st, int i, const struct regstat *i_regs) { - u_int c2op = source[i] & 0x3f; + u_int c2op = st->source[i] & 0x3f; u_int reglist_full = get_host_reglist(i_regs->regmap); u_int reglist = reglist_full & CALLER_SAVE_REGS; int need_flags, need_ir; @@ -1968,20 +1969,20 @@ static void c2op_assemble(int i, const struct regstat *i_regs) need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", - source[i],gte_unneeded[i+1],need_flags,need_ir); + st->source[i], gte_unneeded[i+1], need_flags, need_ir); if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS)) need_flags=0; - int shift = (source[i] >> 19) & 1; - int lm = (source[i] >> 10) & 1; + int shift = (st->source[i] >> 19) & 1; + int lm = (st->source[i] >> 10) & 1; switch(c2op) { #ifndef DRC_DBG case GTE_MVMVA: { #ifdef HAVE_ARMV5 - int v = (source[i] >> 15) & 3; - int cv = (source[i] >> 13) & 3; - int mx = (source[i] >> 17) & 3; + int v = (st->source[i] >> 15) & 3; + int cv = (st->source[i] >> 13) & 3; + int mx = (st->source[i] >> 17) & 3; reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7} - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st, c2op, i, i_regs, reglist); /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ if(v<3) emit_ldrd(v*8,0,4); @@ -2000,7 +2001,7 @@ static void c2op_assemble(int i, const struct regstat *i_regs) else emit_readword(&zeromem_ptr,7); #ifdef __ARM_NEON__ - emit_movimm(source[i],1); // opcode + emit_movimm(st->source[i],1); // opcode emit_far_call(gteMVMVA_part_neon); if(need_flags) { emit_movimm(lm,1); @@ -2017,15 +2018,15 @@ static void c2op_assemble(int i, const struct regstat *i_regs) c2op_call_MACtoIR(lm,need_flags); #endif #else /* if not HAVE_ARMV5 */ - c2op_prologue(c2op,i,i_regs,reglist); - emit_movimm(source[i],1); // opcode + c2op_prologue(st,c2op,i,i_regs,reglist); + emit_movimm(st->source[i],1); // opcode emit_writeword(1,&psxRegs.code); emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); #endif break; } case GTE_OP: - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st,c2op,i,i_regs,reglist); emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift); if(need_flags||need_ir) { emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); @@ -2033,15 +2034,15 @@ static void c2op_assemble(int i, const struct regstat *i_regs) } break; case GTE_DPCS: - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st,c2op,i,i_regs,reglist); c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); break; case GTE_INTPL: - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st,c2op,i,i_regs,reglist); c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); break; case GTE_SQR: - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st,c2op,i,i_regs,reglist); emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift); if(need_flags||need_ir) { emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); @@ -2049,22 +2050,22 @@ static void c2op_assemble(int i, const struct regstat *i_regs) } break; case GTE_DCPL: - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st,c2op,i,i_regs,reglist); c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); break; case GTE_GPF: - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st,c2op,i,i_regs,reglist); c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); break; case GTE_GPL: - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st,c2op,i,i_regs,reglist); c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); break; #endif default: - c2op_prologue(c2op,i,i_regs,reglist); + c2op_prologue(st,c2op,i,i_regs,reglist); #ifdef DRC_DBG - emit_movimm(source[i],1); // opcode + emit_movimm(st->source[i],1); // opcode emit_writeword(1,&psxRegs.code); #endif emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); @@ -2310,16 +2311,18 @@ static void do_miniht_jump(int rs,int rh,int ht) { do_jump_vaddr(rs); } -static void do_miniht_insert(u_int return_address,int rt,int temp) { +static void do_miniht_insert(struct compile_state *st, u_int return_address, + int rt, int temp) +{ #ifndef HAVE_ARMV7 emit_movimm(return_address,rt); // PC into link register - add_to_linker(out,return_address,1); + add_to_linker(st, out, return_address, 1); emit_pcreladdr(temp); emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]); #else emit_movw(return_address&0x0000FFFF,rt); - add_to_linker(out,return_address,1); + add_to_linker(st, out, return_address, 1); emit_pcreladdr(temp); emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]); emit_movt(return_address&0xFFFF0000,rt); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 0b11a73a..dbea1e00 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1449,9 +1449,9 @@ static void loadstore_extend(enum stub_type type, u_int rs, u_int rt) #include "pcsxmem.h" //#include "pcsxmem_inline.c" -static void do_readstub(int n) +static void do_readstub(struct compile_state *st, int n) { - assem_debug("do_readstub %x\n",start+stubs[n].a*4); + assem_debug("do_readstub %x\n", st->start + stubs[n].a*4); set_jump_target(stubs[n].addr, out); enum stub_type type = stubs[n].type; int i = stubs[n].a; @@ -1625,9 +1625,9 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, restore_regs(reglist); } -static void do_writestub(int n) +static void do_writestub(struct compile_state *st, int n) { - assem_debug("do_writestub %x\n",start+stubs[n].a*4); + assem_debug("do_writestub %x\n", st->start + stubs[n].a*4); set_jump_target(stubs[n].addr, out); enum stub_type type=stubs[n].type; int i=stubs[n].a; @@ -1761,10 +1761,11 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, /* Special assem */ -static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) +static void c2op_prologue(struct compile_state *st, u_int op, int i, + const struct regstat *i_regs, u_int reglist) { save_load_regs_all(1, reglist); - cop2_do_stall_check(op, i, i_regs, 0); + cop2_do_stall_check(st, op, i, i_regs, 0); #ifdef PCNT emit_movimm(op, 0); emit_far_call(pcnt_gte_start); @@ -1782,9 +1783,9 @@ static void c2op_epilogue(u_int op,u_int reglist) save_load_regs_all(0, reglist); } -static void c2op_assemble(int i, const struct regstat *i_regs) +static void c2op_assemble(struct compile_state *st, int i, const struct regstat *i_regs) { - u_int c2op=source[i]&0x3f; + u_int c2op = st->source[i]&0x3f; u_int hr,reglist_full=0,reglist; int need_flags,need_ir; for(hr=0;hr>63); // +1 because of how liveness detection works need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n", - source[i],gte_unneeded[i+1],need_flags,need_ir); + st->source[i], gte_unneeded[i+1], need_flags, need_ir); if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS)) need_flags=0; //int shift = (source[i] >> 19) & 1; @@ -1804,8 +1805,8 @@ static void c2op_assemble(int i, const struct regstat *i_regs) switch(c2op) { default: (void)need_ir; - c2op_prologue(c2op, i, i_regs, reglist); - emit_movimm(source[i],1); // opcode + c2op_prologue(st, c2op, i, i_regs, reglist); + emit_movimm(st->source[i], 1); // opcode emit_writeword(1,&psxRegs.code); emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]); break; @@ -2066,10 +2067,12 @@ static void do_miniht_jump(u_int rs, u_int rh, u_int ht) { } // parsed by set_jump_target? -static void do_miniht_insert(u_int return_address,u_int rt,int temp) { +static void do_miniht_insert(struct compile_state *st, u_int return_address, + u_int rt, int temp) +{ emit_movz_lsl16((return_address>>16)&0xffff,rt); emit_movk(return_address&0xffff,rt); - add_to_linker(out,return_address,1); + add_to_linker(st, out, return_address, 1); emit_adr(out,temp); emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]); emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index a760f757..15c4b803 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -298,8 +298,6 @@ static struct compile_info static struct block_info *blocks[PAGE_COUNT]; static struct block_info *block_oldest, *block_last_compiled; static struct jump_info *jumps[PAGE_COUNT]; // [] - static u_int start; - static u_int *source; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; @@ -318,15 +316,10 @@ static struct compile_info static uint32_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; - static int slen; - static void *instr_addr[MAXBLOCK]; - static struct link_entry link_addr[MAXBLOCK]; - static int linkcount; static struct code_stub stubs[MAXBLOCK*3]; static int stubcount; static u_int literals[1024][2]; static int literalcount; - static int is_delayslot; static u_int stop_after_jal; static u_int ni_count; static u_int err_print_count; @@ -353,6 +346,17 @@ static struct compile_info #define stat_clear(s) #endif +struct compile_state +{ + u_int *source; + u_int start; + int slen; + int is_delayslot; + int linkcount; + void *instr_addr[MAXBLOCK]; + struct link_entry link_addr[MAXBLOCK]; +}; + #define HACK_ENABLED(x) ((ndrc_g.hacks | ndrc_g.hacks_pergame) & (x)) /* registers that may be allocated */ @@ -429,27 +433,30 @@ static void ndrc_write_invalidate_many(u_int addr, u_int end); static int new_recompile_block(u_int addr); static void invalidate_block(struct block_info *block); -static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_); +static void exception_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_); // Needed by assembler static void wb_register(signed char r, const signed char regmap[], u_int dirty); static void wb_dirtys(const signed char i_regmap[], u_int i_dirty); -static void wb_needed_dirtys(const signed char i_regmap[], u_int i_dirty, int addr); +static void wb_needed_dirtys(struct compile_state *st, const signed char i_regmap[], + u_int i_dirty, int addr); static void load_all_regs(const signed char i_regmap[]); static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[]); static void load_regs_entry(int t); static void load_all_consts(const signed char regmap[], u_int dirty, int i); static u_int get_host_reglist(const signed char *regmap); -static int get_final_value(int hr, int i, u_int *value); +static int get_final_value(struct compile_state *st, int hr, int i, u_int *value); static void add_stub(enum stub_type type, void *addr, void *retaddr, u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e); static void add_stub_r(enum stub_type type, void *addr, void *retaddr, int i, int addr_reg, const struct regstat *i_regs, int ccadj, u_int reglist); -static void add_to_linker(void *addr, u_int target, int ext); +static void add_to_linker(struct compile_state *st, void *addr, u_int target, int ext); static void *get_direct_memhandler(void *table, u_int addr, enum stub_type type, uintptr_t *addr_host); -static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist); +static void cop2_do_stall_check(struct compile_state *st, u_int op, int i, + const struct regstat *i_regs, u_int reglist); static void pass_args(int a0, int a1); static void emit_far_jump(const void *f); static void emit_far_call(const void *f); @@ -1088,14 +1095,14 @@ static uint32_t get_const(const struct regstat *cur, signed char reg) // Least soon needed registers // Look at the next ten instructions and see which registers // will be used. Try not to reallocate these. -static void lsn(u_char hsn[], int i) +static void lsn(struct compile_state *st, u_char hsn[], int i) { int j; int b=-1; for(j=0;j<9;j++) { - if(i+j>=slen) { - j=slen-i-1; + if(i+j >= st->slen) { + j = st->slen-i-1; break; } if (dops[i+j].is_ujump) @@ -1131,11 +1138,12 @@ static void lsn(u_char hsn[], int i) } if(b>=0) { - if(cinfo[i+b].ba>=start && cinfo[i+b].ba<(start+slen*4)) + if(cinfo[i+b].ba >= st->start && cinfo[i+b].ba < (st->start + st->slen*4)) { // Follow first branch - int t=(cinfo[i+b].ba-start)>>2; - j=7-b;if(t+j>=slen) j=slen-t-1; + int t=(cinfo[i+b].ba-st->start)>>2; + j=7-b; + if (t+j >= st->slen) j = st->slen-t-1; for(;j>=0;j--) { if(dops[t+j].rs1) if(hsn[dops[t+j].rs1]>j+b+2) hsn[dops[t+j].rs1]=j+b+2; @@ -1172,7 +1180,7 @@ static void lsn(u_char hsn[], int i) } // We only want to allocate registers if we're going to use them again soon -static int needed_again(int r, int i) +static int needed_again(struct compile_state *st, int r, int i) { int j; int b=-1; @@ -1180,13 +1188,13 @@ static int needed_again(int r, int i) if (i > 0 && dops[i-1].is_ujump) { - if(cinfo[i-1].bastart+slen*4-4) + if(cinfo[i-1].ba < st->start || cinfo[i-1].ba > st->start + st->slen*4-4) return 0; // Don't need any registers if exiting the block } for(j=0;j<9;j++) { - if(i+j>=slen) { - j=slen-i-1; + if(i+j >= st->slen) { + j = st->slen-i-1; break; } if (dops[i+j].is_ujump) @@ -1217,13 +1225,13 @@ static int needed_again(int r, int i) // Try to match register allocations at the end of a loop with those // at the beginning -static int loop_reg(int i, int r, int hr) +static int loop_reg(struct compile_state *st, int i, int r, int hr) { int j,k; for(j=0;j<9;j++) { - if(i+j>=slen) { - j=slen-i-1; + if(i+j >= st->slen) { + j = st->slen-i-1; break; } if (dops[i+j].is_ujump) @@ -1244,9 +1252,9 @@ static int loop_reg(int i, int r, int hr) if((unneeded_reg[i+k]>>r)&1) return hr; if(i+k>=0&&(dops[i+k].itype==UJUMP||dops[i+k].itype==CJUMP||dops[i+k].itype==SJUMP)) { - if(cinfo[i+k].ba>=start && cinfo[i+k].ba<(start+i*4)) + if(cinfo[i+k].ba >= st->start && cinfo[i+k].ba < (st->start+i*4)) { - int t=(cinfo[i+k].ba-start)>>2; + int t=(cinfo[i+k].ba - st->start)>>2; int reg=get_reg(regs[t].regmap_entry,r); if(reg>=0) return reg; //reg=get_reg(regs[t+1].regmap_entry,r); @@ -1766,12 +1774,13 @@ static void alloc_set(struct regstat *cur, int reg, int hr) cur->noevict |= 1u << hr; } -static void evict_alloc_reg(struct regstat *cur, int i, int reg, int preferred_hr) +static void evict_alloc_reg(struct compile_state *st, struct regstat *cur, + int i, int reg, int preferred_hr) { u_char hsn[MAXREG+1]; int j, r, hr; memset(hsn, 10, sizeof(hsn)); - lsn(hsn, i); + lsn(st, hsn, i); //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); if(i>0) { // Don't evict the cycle count at entry points, otherwise the entry @@ -1826,7 +1835,8 @@ static void evict_alloc_reg(struct regstat *cur, int i, int reg, int preferred_h // Note: registers are allocated clean (unmodified state) // if you intend to modify the register, you must call dirty_reg(). -static void alloc_reg(struct regstat *cur,int i,signed char reg) +static void alloc_reg(struct compile_state *st, struct regstat *cur, + int i, signed char reg) { int r,hr; int preferred_reg = PREFERRED_REG_FIRST @@ -1846,7 +1856,7 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) } // Keep the same mapping if the register was already allocated in a loop - preferred_reg = loop_reg(i,reg,preferred_reg); + preferred_reg = loop_reg(st,i,reg,preferred_reg); // Try to allocate the preferred register if (cur->regmap[preferred_reg] == -1) { @@ -1915,13 +1925,14 @@ static void alloc_reg(struct regstat *cur,int i,signed char reg) // Ok, now we have to evict someone // Pick a register we hopefully won't need soon - evict_alloc_reg(cur, i, reg, preferred_reg); + evict_alloc_reg(st, cur, i, reg, preferred_reg); } // Allocate a temporary register. This is done without regard to // dirty status or whether the register we request is on the unneeded list // Note: This will only allocate one register, even if called multiple times -static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) +static void alloc_reg_temp(struct compile_state *st, struct regstat *cur, + int i, signed char reg) { int r,hr; @@ -1959,10 +1970,10 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) // Ok, now we have to evict someone // Pick a register we hopefully won't need soon - evict_alloc_reg(cur, i, reg, 0); + evict_alloc_reg(st, cur, i, reg, 0); } -static void mov_alloc(struct regstat *current,int i) +static void mov_alloc(struct compile_state *st, struct regstat *current, int i) { if (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG) { alloc_cc(current,i); // for stalls @@ -1970,23 +1981,23 @@ static void mov_alloc(struct regstat *current,int i) } // Note: Don't need to actually alloc the source registers - //alloc_reg(current,i,dops[i].rs1); - alloc_reg(current,i,dops[i].rt1); + //alloc_reg(st, current, i, dops[i].rs1); + alloc_reg(st, current, i, dops[i].rt1); clear_const(current,dops[i].rs1); clear_const(current,dops[i].rt1); dirty_reg(current,dops[i].rt1); } -static void shiftimm_alloc(struct regstat *current,int i) +static void shiftimm_alloc(struct compile_state *st, struct regstat *current,int i) { if(dops[i].opcode2<=0x3) // SLL/SRL/SRA { if(dops[i].rt1) { - if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + if(dops[i].rs1&&needed_again(st,dops[i].rs1,i)) alloc_reg(st, current,i,dops[i].rs1); else dops[i].use_lt1=!!dops[i].rs1; - alloc_reg(current,i,dops[i].rt1); - dirty_reg(current,dops[i].rt1); + alloc_reg(st, current, i, dops[i].rt1); + dirty_reg( current, dops[i].rt1); if(is_const(current,dops[i].rs1)) { int v=get_const(current,dops[i].rs1); if(dops[i].opcode2==0x00) set_const(current,dops[i].rt1,v<=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU if(dops[i].rt1) { if(dops[i].rs1&&dops[i].rs2) { - alloc_reg(current,i,dops[i].rs1); - alloc_reg(current,i,dops[i].rs2); + alloc_reg(st, current, i, dops[i].rs1); + alloc_reg(st, current, i, dops[i].rs2); } else { - if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); - if(dops[i].rs2&&needed_again(dops[i].rs2,i)) alloc_reg(current,i,dops[i].rs2); + if(dops[i].rs1&&needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1); + if(dops[i].rs2&&needed_again(st,dops[i].rs2,i)) alloc_reg(st,current,i,dops[i].rs2); } - alloc_reg(current,i,dops[i].rt1); + alloc_reg(st, current, i, dops[i].rt1); } if (dops[i].may_except) { alloc_cc_optional(current, i); // for exceptions - alloc_reg_temp(current, i, -1); + alloc_reg_temp(st, current, i, -1); cinfo[i].min_free_regs = 1; } } else if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU if(dops[i].rt1) { - alloc_reg(current,i,dops[i].rs1); - alloc_reg(current,i,dops[i].rs2); - alloc_reg(current,i,dops[i].rt1); + alloc_reg(st, current, i, dops[i].rs1); + alloc_reg(st, current, i, dops[i].rs2); + alloc_reg(st, current, i, dops[i].rt1); } } else if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR if(dops[i].rt1) { if(dops[i].rs1&&dops[i].rs2) { - alloc_reg(current,i,dops[i].rs1); - alloc_reg(current,i,dops[i].rs2); + alloc_reg(st, current, i, dops[i].rs1); + alloc_reg(st, current, i, dops[i].rs2); } else { - if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); - if(dops[i].rs2&&needed_again(dops[i].rs2,i)) alloc_reg(current,i,dops[i].rs2); + if(dops[i].rs1&&needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1); + if(dops[i].rs2&&needed_again(st,dops[i].rs2,i)) alloc_reg(st,current,i,dops[i].rs2); } - alloc_reg(current,i,dops[i].rt1); + alloc_reg(st, current, i, dops[i].rt1); } } clear_const(current,dops[i].rs1); @@ -2084,11 +2095,11 @@ static void alu_alloc(struct regstat *current,int i) dirty_reg(current,dops[i].rt1); } -static void imm16_alloc(struct regstat *current,int i) +static void imm16_alloc(struct compile_state *st, struct regstat *current, int i) { - if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); + if(dops[i].rs1&&needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1); else dops[i].use_lt1=!!dops[i].rs1; - if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1); + if(dops[i].rt1) alloc_reg(st,current,i,dops[i].rt1); if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU clear_const(current,dops[i].rs1); clear_const(current,dops[i].rt1); @@ -2110,7 +2121,7 @@ static void imm16_alloc(struct regstat *current,int i) else clear_const(current,dops[i].rt1); if (dops[i].may_except) { alloc_cc_optional(current, i); // for exceptions - alloc_reg_temp(current, i, -1); + alloc_reg_temp(st, current, i, -1); cinfo[i].min_free_regs = 1; } } @@ -2120,28 +2131,28 @@ static void imm16_alloc(struct regstat *current,int i) dirty_reg(current,dops[i].rt1); } -static void load_alloc(struct regstat *current,int i) +static void load_alloc(struct compile_state *st, struct regstat *current, int i) { int need_temp = 0; clear_const(current,dops[i].rt1); - //if(dops[i].rs1!=dops[i].rt1&&needed_again(dops[i].rs1,i)) clear_const(current,dops[i].rs1); // Does this help or hurt? + //if(dops[i].rs1!=dops[i].rt1&&needed_again(st,dops[i].rs1,i)) clear_const(current,dops[i].rs1); // Does this help or hurt? if(!dops[i].rs1) current->u&=~1LL; // Allow allocating r0 if it's the source register - if (needed_again(dops[i].rs1, i)) - alloc_reg(current, i, dops[i].rs1); + if (needed_again(st,dops[i].rs1, i)) + alloc_reg(st,current, i, dops[i].rs1); if (ram_offset) - alloc_reg(current, i, ROREG); + alloc_reg(st,current, i, ROREG); if (dops[i].may_except) { alloc_cc_optional(current, i); // for exceptions need_temp = 1; } if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) { - alloc_reg(current,i,dops[i].rt1); + alloc_reg(st,current,i,dops[i].rt1); assert(get_reg_w(current->regmap, dops[i].rt1)>=0); dirty_reg(current,dops[i].rt1); // LWL/LWR need a temporary register for the old value if(dops[i].opcode==0x22||dops[i].opcode==0x26) { - alloc_reg(current,i,FTEMP); + alloc_reg(st,current,i,FTEMP); need_temp = 1; } } @@ -2150,59 +2161,59 @@ static void load_alloc(struct regstat *current,int i) // Load to r0 or unneeded register (dummy load) // but we still need a register to calculate the address if(dops[i].opcode==0x22||dops[i].opcode==0x26) - alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary + alloc_reg(st,current,i,FTEMP); // LWL/LWR need another temporary need_temp = 1; } if (need_temp) { - alloc_reg_temp(current, i, -1); + alloc_reg_temp(st, current, i, -1); cinfo[i].min_free_regs = 1; } } // this may eat up to 7 registers -static void store_alloc(struct regstat *current, int i) +static void store_alloc(struct compile_state *st, struct regstat *current, int i) { clear_const(current,dops[i].rs2); if(!(dops[i].rs2)) current->u&=~1LL; // Allow allocating r0 if necessary - if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); - alloc_reg(current,i,dops[i].rs2); + if(needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1); + alloc_reg(st,current,i,dops[i].rs2); if (ram_offset) - alloc_reg(current, i, ROREG); + alloc_reg(st,current, i, ROREG); #if defined(HOST_IMM8) // On CPUs without 32-bit immediates we need a pointer to invalid_code - alloc_reg(current, i, INVCP); + alloc_reg(st,current, i, INVCP); #endif if (dops[i].opcode == 0x2a || dops[i].opcode == 0x2e) { // SWL/SWL - alloc_reg(current,i,FTEMP); + alloc_reg(st,current,i,FTEMP); } if (dops[i].may_except) alloc_cc_optional(current, i); // for exceptions // We need a temporary register for address generation - alloc_reg_temp(current,i,-1); + alloc_reg_temp(st,current,i,-1); cinfo[i].min_free_regs=1; } -static void c2ls_alloc(struct regstat *current, int i) +static void c2ls_alloc(struct compile_state *st, struct regstat *current, int i) { clear_const(current,dops[i].rt1); - if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); - alloc_reg(current,i,FTEMP); + if(needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1); + alloc_reg(st,current,i,FTEMP); if (ram_offset) - alloc_reg(current, i, ROREG); + alloc_reg(st,current, i, ROREG); #if defined(HOST_IMM8) // On CPUs without 32-bit immediates we need a pointer to invalid_code if (dops[i].opcode == 0x3a) // SWC2 - alloc_reg(current,i,INVCP); + alloc_reg(st,current,i,INVCP); #endif if (dops[i].may_except) alloc_cc_optional(current, i); // for exceptions // We need a temporary register for address generation - alloc_reg_temp(current,i,-1); + alloc_reg_temp(st,current,i,-1); cinfo[i].min_free_regs=1; } #ifndef multdiv_alloc -static void multdiv_alloc(struct regstat *current,int i) +static void multdiv_alloc(struct compile_state *st, struct regstat *current, int i) { // case 0x18: MULT // case 0x19: MULTU @@ -2214,56 +2225,56 @@ static void multdiv_alloc(struct regstat *current,int i) dirty_reg(current,CCREG); current->u &= ~(1ull << HIREG); current->u &= ~(1ull << LOREG); - alloc_reg(current, i, HIREG); - alloc_reg(current, i, LOREG); + alloc_reg(st,current, i, HIREG); + alloc_reg(st,current, i, LOREG); dirty_reg(current, HIREG); dirty_reg(current, LOREG); if ((dops[i].opcode2 & 0x3e) == 0x1a || (dops[i].rs1 && dops[i].rs2)) // div(u) { - alloc_reg(current, i, dops[i].rs1); - alloc_reg(current, i, dops[i].rs2); + alloc_reg(st,current, i, dops[i].rs1); + alloc_reg(st,current, i, dops[i].rs2); } // else multiply by zero is zero } #endif -static void cop0_alloc(struct regstat *current,int i) +static void cop0_alloc(struct compile_state *st, struct regstat *current, int i) { if(dops[i].opcode2==0) // MFC0 { if(dops[i].rt1) { clear_const(current,dops[i].rt1); - alloc_reg(current,i,dops[i].rt1); + alloc_reg(st,current,i,dops[i].rt1); dirty_reg(current,dops[i].rt1); } } else if(dops[i].opcode2==4) // MTC0 { - if (((source[i]>>11)&0x1e) == 12) { + if (((st->source[i]>>11)&0x1e) == 12) { alloc_cc(current, i); dirty_reg(current, CCREG); } if(dops[i].rs1){ clear_const(current,dops[i].rs1); - alloc_reg(current,i,dops[i].rs1); + alloc_reg(st,current,i,dops[i].rs1); alloc_all(current,i); } else { alloc_all(current,i); // FIXME: Keep r0 current->u&=~1LL; - alloc_reg(current,i,0); + alloc_reg(st,current,i,0); } cinfo[i].min_free_regs = HOST_REGS; } } -static void rfe_alloc(struct regstat *current, int i) +static void rfe_alloc(struct compile_state *st, struct regstat *current, int i) { alloc_all(current, i); cinfo[i].min_free_regs = HOST_REGS; } -static void cop2_alloc(struct regstat *current,int i) +static void cop2_alloc(struct compile_state *st, struct regstat *current, int i) { if (dops[i].opcode2 < 3) // MFC2/CFC2 { @@ -2271,7 +2282,7 @@ static void cop2_alloc(struct regstat *current,int i) dirty_reg(current,CCREG); if(dops[i].rt1){ clear_const(current,dops[i].rt1); - alloc_reg(current,i,dops[i].rt1); + alloc_reg(st,current,i,dops[i].rt1); dirty_reg(current,dops[i].rt1); } } @@ -2279,25 +2290,25 @@ static void cop2_alloc(struct regstat *current,int i) { if(dops[i].rs1){ clear_const(current,dops[i].rs1); - alloc_reg(current,i,dops[i].rs1); + alloc_reg(st,current,i,dops[i].rs1); } else { current->u&=~1LL; - alloc_reg(current,i,0); + alloc_reg(st,current,i,0); } } - alloc_reg_temp(current,i,-1); + alloc_reg_temp(st,current,i,-1); cinfo[i].min_free_regs=1; } -static void c2op_alloc(struct regstat *current,int i) +static void c2op_alloc(struct compile_state *st, struct regstat *current, int i) { alloc_cc(current,i); // for stalls dirty_reg(current,CCREG); - alloc_reg_temp(current,i,-1); + alloc_reg_temp(st,current,i,-1); } -static void syscall_alloc(struct regstat *current,int i) +static void syscall_alloc(struct compile_state *st, struct regstat *current, int i) { alloc_cc(current,i); dirty_reg(current,CCREG); @@ -2306,7 +2317,7 @@ static void syscall_alloc(struct regstat *current,int i) current->isconst=0; } -static void delayslot_alloc(struct regstat *current,int i) +static void delayslot_alloc(struct compile_state *st, struct regstat *current, int i) { switch(dops[i].itype) { case UJUMP: @@ -2316,45 +2327,45 @@ static void delayslot_alloc(struct regstat *current,int i) case SYSCALL: case HLECALL: case IMM16: - imm16_alloc(current,i); + imm16_alloc(st, current, i); break; case LOAD: case LOADLR: - load_alloc(current,i); + load_alloc(st, current, i); break; case STORE: case STORELR: - store_alloc(current,i); + store_alloc(st, current, i); break; case ALU: - alu_alloc(current,i); + alu_alloc(st, current, i); break; case SHIFT: - shift_alloc(current,i); + shift_alloc(st, current, i); break; case MULTDIV: - multdiv_alloc(current,i); + multdiv_alloc(st, current, i); break; case SHIFTIMM: - shiftimm_alloc(current,i); + shiftimm_alloc(st, current, i); break; case MOV: - mov_alloc(current,i); + mov_alloc(st, current, i); break; case COP0: - cop0_alloc(current,i); + cop0_alloc(st, current, i); break; case RFE: - rfe_alloc(current,i); + rfe_alloc(st, current, i); break; case COP2: - cop2_alloc(current,i); + cop2_alloc(st, current, i); break; case C2LS: - c2ls_alloc(current,i); + c2ls_alloc(st, current, i); break; case C2OP: - c2op_alloc(current,i); + c2op_alloc(st, current, i); break; } } @@ -2877,10 +2888,10 @@ enum { MTYPE_1F80, }; -static int get_ptr_mem_type(u_int a) +static int get_ptr_mem_type(struct compile_state *st, u_int a) { if(a < 0x00200000) { - if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0)) + if(a<0x1000&&((st->start>>20)==0xbfc||(st->start>>24)==0xa0)) // return wrong, must use memhandler for BIOS self-test to pass // 007 does similar stuff from a00 mirror, weird stuff return MTYPE_8000; @@ -2907,8 +2918,9 @@ static int get_ro_reg(const struct regstat *i_regs, int host_tempreg_free) return r; } -static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, - int addr, int *offset_reg, int *addr_reg_override, int ccadj_) +static void *emit_fastpath_cmp_jump(struct compile_state *st, int i, + const struct regstat *i_regs, int addr, int *offset_reg, + int *addr_reg_override, int ccadj_) { void *jaddr = NULL; int type = 0; @@ -2916,13 +2928,13 @@ static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, assert(addr >= 0); *offset_reg = -1; if(((smrv_strong|smrv_weak)>>mr)&1) { - type=get_ptr_mem_type(ndrc_smrv_regs[mr]); - //printf("set %08x @%08x r%d %d\n", ndrc_smrv_regs[mr], start+i*4, mr, type); + type=get_ptr_mem_type(st, ndrc_smrv_regs[mr]); + //printf("set %08x @%08x r%d %d\n", ndrc_smrv_regs[mr], st->start+i*4, mr, type); } else { // use the mirror we are running on - type=get_ptr_mem_type(start); - //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); + type=get_ptr_mem_type(st, st->start); + //printf("set nospec @%08x r%d %d\n", st->start+i*4, mr, type); } if (dops[i].may_except) { @@ -3086,7 +3098,8 @@ static void do_store_byte(int a, int rt, int offset_reg) emit_writebyte_indexed(rt, 0, a); } -static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void load_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { int addr = cinfo[i].addr; int s,tl; @@ -3123,10 +3136,10 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) if(!c) { #ifdef R29_HACK // Strmnnrmn's speed hack - if(dops[i].rs1!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) + if(dops[i].rs1!=29||st->start<0x80001000||st->start>=0x80000000+RAM_SIZE) #endif { - jaddr = emit_fastpath_cmp_jump(i, i_regs, addr, + jaddr = emit_fastpath_cmp_jump(st, i, i_regs, addr, &offset_reg, &fastio_reg_override, ccadj_); } } @@ -3228,7 +3241,8 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) } #ifndef loadlr_assemble -static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void loadlr_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { int addr = cinfo[i].addr; int s,tl,temp,temp2; @@ -3258,7 +3272,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) }else{ emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR } - jaddr = emit_fastpath_cmp_jump(i, i_regs, temp2, + jaddr = emit_fastpath_cmp_jump(st, i, i_regs, temp2, &offset_reg, &fastio_reg_override, ccadj_); } else { @@ -3308,10 +3322,10 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) } #endif -static void do_invstub(int n) +static void do_invstub(struct compile_state *st, int n) { literal_pool(20); - assem_debug("do_invstub %x\n", start + stubs[n].e*4); + assem_debug("do_invstub %x\n", st->start + stubs[n].e*4); u_int reglist = stubs[n].a; u_int addrr = stubs[n].b; int ofs_start = stubs[n].c; @@ -3340,7 +3354,8 @@ static void do_invstub(int n) emit_jmp(stubs[n].retaddr); } -static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglist, int addr) +static void do_store_smc_check(struct compile_state *st, int i, + const struct regstat *i_regs, u_int reglist, int addr) { if (HACK_ENABLED(NDHACK_NO_SMC_CHECK)) return; @@ -3353,7 +3368,7 @@ static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglis return; int j, imm_maxdiff = 32, imm_min = cinfo[i].imm, imm_max = cinfo[i].imm, count = 1; - if (i < slen - 1 && dops[i+1].is_store && dops[i+1].rs1 == dops[i].rs1 + if (i < st->slen - 1 && dops[i+1].is_store && dops[i+1].rs1 == dops[i].rs1 && abs(cinfo[i+1].imm - cinfo[i].imm) <= imm_maxdiff) return; for (j = i - 1; j >= 0; j--) { @@ -3396,7 +3411,8 @@ static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglis // (also true non-existent 0x20000000 mirror that shouldn't matter) #define is_ram_addr(a) !((a) & 0x5f800000) -static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void store_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { int s,tl; int addr = cinfo[i].addr; @@ -3423,7 +3439,7 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<regmap, dops[i].rs2, ccadj_, reglist); // basic current block modification detection.. // not looking back as that should be in mips cache already // (see Spyro2 title->attract mode) - if (start + i*4 < addr_const && addr_const < start + slen*4) { + if (st->start + i*4 < addr_const && addr_const < st->start + st->slen*4) { SysPrintf_lim("write to %08x hits block %08x, pc=%08x\n", - addr_const, start, start+i*4); + addr_const, st->start, st->start+i*4); assert(i_regs->regmap==regs[i].regmap); // not delay slot if(i_regs->regmap==regs[i].regmap) { load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); wb_dirtys(regs[i].regmap_entry,regs[i].wasdirty); emit_readptr(&hash_table_ptr, 1); - emit_movimm(start+i*4+4, 0); + emit_movimm(st->start+i*4+4, 0); emit_writeword(0, &psxRegs.pc); emit_addimm(HOST_CCREG, 2, HOST_CCREG); emit_far_call(ndrc_get_addr_ht); @@ -3491,7 +3507,8 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) } } -static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void storelr_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { int addr = cinfo[i].addr; int s,tl; @@ -3603,15 +3620,16 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) if (!c || !memtarget) add_stub_r(STORELR_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist); if (!c || is_ram_addr(addr_const)) - do_store_smc_check(i, i_regs, reglist, addr); + do_store_smc_check(st, i, i_regs, reglist, addr); } -static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void cop0_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { if(dops[i].opcode2==0) // MFC0 { signed char t=get_reg_w(i_regs->regmap, dops[i].rt1); - u_int copr=(source[i]>>11)&0x1f; + u_int copr=(st->source[i]>>11)&0x1f; if(t>=0&&dops[i].rt1!=0) { emit_readword(&psxRegs.CP0.r[copr],t); } @@ -3620,7 +3638,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) { int s = get_reg(i_regs->regmap, dops[i].rs1); int cc = get_reg(i_regs->regmap, CCREG); - char copr=(source[i]>>11)&0x1f; + char copr=(st->source[i]>>11)&0x1f; assert(s>=0); wb_register(dops[i].rs1,i_regs->regmap,i_regs->dirty); if (copr == 12 || copr == 13) { @@ -3630,7 +3648,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_add(HOST_CCREG, HOST_TEMPREG, HOST_CCREG); emit_addimm(HOST_CCREG, ccadj_ + 2, HOST_CCREG); emit_writeword(HOST_CCREG, &psxRegs.cycle); - if (is_delayslot) { + if (st->is_delayslot) { // burn cycles to cause cc_interrupt, which will // reschedule next_interupt. Relies on CCREG from above. assem_debug("MTC0 DS %d\n", copr); @@ -3644,7 +3662,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_loadreg(dops[i].rs1,s); return; } - emit_movimm(start+i*4+4,HOST_TEMPREG); + emit_movimm(st->start+i*4+4,HOST_TEMPREG); emit_writeword(HOST_TEMPREG,&psxRegs.pc); } if (s != 2) @@ -3657,9 +3675,9 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_readword(&last_count,HOST_TEMPREG); emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); //emit_writeword(HOST_TEMPREG,&last_count); - assert(!is_delayslot); + assert(!st->is_delayslot); emit_readword(&psxRegs.pc, 0); - emit_movimm(start+i*4+4, HOST_TEMPREG); + emit_movimm(st->start+i*4+4, HOST_TEMPREG); emit_cmp(HOST_TEMPREG, 0); void *jaddr = out; emit_jeq(0); @@ -3684,14 +3702,14 @@ static void rfe_assemble(int i, const struct regstat *i_regs) emit_writeword(0, &psxRegs.CP0.n.SR); } -static int cop2_is_stalling_op(int i, int *cycles) +static int cop2_is_stalling_op(struct compile_state *st, int i, int *cycles) { if (dops[i].itype == COP2 || dops[i].itype == C2LS) { *cycles = 0; return 1; } if (dops[i].itype == C2OP) { - *cycles = gte_cycletab[source[i] & 0x3f]; + *cycles = gte_cycletab[st->source[i] & 0x3f]; return 1; } // ... what about MTC2/CTC2/LWC2? @@ -3718,7 +3736,8 @@ static void emit_log_gte_stall(int i, int stall, u_int reglist) } #endif -static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist) +static void cop2_do_stall_check(struct compile_state *st, u_int op, int i, + const struct regstat *i_regs, u_int reglist) { int j = i, cycles, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed; int rtmp = reglist_find_free(reglist); @@ -3733,7 +3752,7 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u if (!dops[i].bt) { for (j = i - 1; j >= 0; j--) { //if (dops[j].is_ds) break; - if (cop2_is_stalling_op(j, &other_gte_op_cycles) || dops[j].bt) + if (cop2_is_stalling_op(st, j, &other_gte_op_cycles) || dops[j].bt) break; if (j > 0 && cinfo[j - 1].ccadj > cinfo[j].ccadj) break; @@ -3768,12 +3787,12 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u if (cycles == 0) return; other_gte_op_cycles = -1; - for (j = i + 1; j < slen; j++) { - if (cop2_is_stalling_op(j, &other_gte_op_cycles)) + for (j = i + 1; j < st->slen; j++) { + if (cop2_is_stalling_op(st, j, &other_gte_op_cycles)) break; if (dops[j].is_jump) { // check ds - if (j + 1 < slen && cop2_is_stalling_op(j + 1, &other_gte_op_cycles)) + if (j + 1 < st->slen && cop2_is_stalling_op(st, j + 1, &other_gte_op_cycles)) j++; break; } @@ -3781,7 +3800,7 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u if (other_gte_op_cycles >= 0) // will handle stall when assembling that op return; - cycles_passed = cinfo[min(j, slen -1)].ccadj - cinfo[i].ccadj; + cycles_passed = cinfo[min(j, st->slen -1)].ccadj - cinfo[i].ccadj; if (cycles_passed >= cycles) return; assem_debug("; save gteBusyCycle\n"); @@ -3807,7 +3826,8 @@ static int check_multdiv(int i, int *cycles) return 1; } -static void multdiv_prepare_stall(int i, const struct regstat *i_regs, int ccadj_) +static void multdiv_prepare_stall(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { int j, found = 0, c = 0; if (HACK_ENABLED(NDHACK_NO_STALLS)) @@ -3816,14 +3836,14 @@ static void multdiv_prepare_stall(int i, const struct regstat *i_regs, int ccadj // happens occasionally... cc evicted? Don't bother then return; } - for (j = i + 1; j < slen; j++) { + for (j = i + 1; j < st->slen; j++) { if (dops[j].bt) break; if ((found = is_mflohi(j))) break; if (dops[j].is_jump) { // check ds - if (j + 1 < slen && (found = is_mflohi(j + 1))) + if (j + 1 < st->slen && (found = is_mflohi(j + 1))) j++; break; } @@ -3968,7 +3988,8 @@ static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) } } -static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void c2ls_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { int s,tl; int ar; @@ -3980,7 +4001,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) int fastio_reg_override = -1; u_int addr_const = ~0; u_int reglist=get_host_reglist(i_regs->regmap); - u_int copr=(source[i]>>16)&0x1f; + u_int copr=(st->source[i]>>16)&0x1f; s=get_reg(i_regs->regmap,dops[i].rs1); tl=get_reg(i_regs->regmap,FTEMP); offset=cinfo[i].imm; @@ -4003,7 +4024,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) } } - cop2_do_stall_check(0, i, i_regs, reglist); + cop2_do_stall_check(st, 0, i, i_regs, reglist); if (dops[i].opcode==0x3a) { // SWC2 cop2_get_dreg(copr,tl,-1); @@ -4018,7 +4039,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) } else { if(!c) { - jaddr2 = emit_fastpath_cmp_jump(i, i_regs, ar, + jaddr2 = emit_fastpath_cmp_jump(st, i, i_regs, ar, &offset_reg, &fastio_reg_override, ccadj_); } else if (ram_offset && memtarget) { @@ -4051,7 +4072,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) if(jaddr2) add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj_,reglist); if (dops[i].opcode == 0x3a && (!c || is_ram_addr(addr_const))) // SWC2 - do_store_smc_check(i, i_regs, reglist, ar); + do_store_smc_check(st, i, i_regs, reglist, ar); if (dops[i].opcode == 0x32) { // LWC2 host_tempreg_acquire(); cop2_put_dreg(copr,tl,HOST_TEMPREG); @@ -4059,9 +4080,9 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) } } -static void cop2_assemble(int i, const struct regstat *i_regs) +static void cop2_assemble(struct compile_state *st, int i, const struct regstat *i_regs) { - u_int copr = (source[i]>>11) & 0x1f; + u_int copr = (st->source[i]>>11) & 0x1f; signed char temp = get_reg_temp(i_regs->regmap); if (!HACK_ENABLED(NDHACK_NO_STALLS)) { @@ -4070,7 +4091,7 @@ static void cop2_assemble(int i, const struct regstat *i_regs) signed char tl = get_reg(i_regs->regmap, dops[i].rt1); reglist = reglist_exclude(reglist, tl, -1); } - cop2_do_stall_check(0, i, i_regs, reglist); + cop2_do_stall_check(st, 0, i, i_regs, reglist); } if (dops[i].opcode2==0) { // MFC2 signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1); @@ -4112,9 +4133,9 @@ static void cop2_assemble(int i, const struct regstat *i_regs) } } -static void do_unalignedwritestub(int n) +static void do_unalignedwritestub(struct compile_state *st, int n) { - assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4); + assem_debug("do_unalignedwritestub %x\n",st->start+stubs[n].a*4); literal_pool(256); set_jump_target(stubs[n].addr, out); @@ -4139,7 +4160,7 @@ static void do_unalignedwritestub(int n) if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,(int)stubs[n].d+1,2); - emit_movimm(start + i*4,3); + emit_movimm(st->start + i*4,3); emit_writeword(3,&psxRegs.pc); emit_far_call((dops[i].opcode==0x2a?jump_handle_swl:jump_handle_swr)); emit_addimm(0,-((int)stubs[n].d+1),cc<0?2:cc); @@ -4149,21 +4170,21 @@ static void do_unalignedwritestub(int n) emit_jmp(stubs[n].retaddr); // return address } -static void do_overflowstub(int n) +static void do_overflowstub(struct compile_state *st, int n) { - assem_debug("do_overflowstub %x\n", start + (u_int)stubs[n].a * 4); + assem_debug("do_overflowstub %x\n", st->start + (u_int)stubs[n].a * 4); literal_pool(24); int i = stubs[n].a; struct regstat *i_regs = (struct regstat *)stubs[n].c; int ccadj = stubs[n].d; set_jump_target(stubs[n].addr, out); wb_dirtys(regs[i].regmap, regs[i].dirty); - exception_assemble(i, i_regs, ccadj); + exception_assemble(st, i, i_regs, ccadj); } -static void do_alignmentstub(int n) +static void do_alignmentstub(struct compile_state *st, int n) { - assem_debug("do_alignmentstub %x\n", start + (u_int)stubs[n].a * 4); + assem_debug("do_alignmentstub %x\n", st->start + (u_int)stubs[n].a * 4); literal_pool(24); int i = stubs[n].a; struct regstat *i_regs = (struct regstat *)stubs[n].c; @@ -4176,7 +4197,7 @@ static void do_alignmentstub(int n) if (stubs[n].b != 1) emit_mov(stubs[n].b, 1); // faulting address emit_movimm(cause, 0); - exception_assemble(i, i_regs, ccadj); + exception_assemble(st, i, i_regs, ccadj); } #ifndef multdiv_assemble @@ -4206,11 +4227,12 @@ static void mov_assemble(int i, const struct regstat *i_regs) } // call interpreter, exception handler, things that change pc/regs/cycles ... -static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, u_int pc, void *func) +static void call_c_cpu_handler(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_, u_int pc, void *func) { signed char ccreg=get_reg(i_regs->regmap,CCREG); assert(ccreg==HOST_CCREG); - assert(!is_delayslot); + assert(!st->is_delayslot); (void)ccreg; emit_movimm(pc,3); // Get PC @@ -4224,22 +4246,23 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, emit_far_jump(jump_to_new_pc); } -static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void exception_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { // 'break' tends to be littered around to catch things like // division by 0 and is almost never executed, so don't emit much code here void *func; if (dops[i].itype == ALU || dops[i].itype == IMM16) - func = is_delayslot ? jump_overflow_ds : jump_overflow; + func = st->is_delayslot ? jump_overflow_ds : jump_overflow; else if (dops[i].itype == LOAD || dops[i].itype == STORE) - func = is_delayslot ? jump_addrerror_ds : jump_addrerror; + func = st->is_delayslot ? jump_addrerror_ds : jump_addrerror; else if (dops[i].opcode2 == 0x0C) - func = is_delayslot ? jump_syscall_ds : jump_syscall; + func = st->is_delayslot ? jump_syscall_ds : jump_syscall; else - func = is_delayslot ? jump_break_ds : jump_break; + func = st->is_delayslot ? jump_break_ds : jump_break; if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) // evicted emit_loadreg(CCREG, HOST_CCREG); - emit_movimm(start + i*4, 2); // pc + emit_movimm(st->start + i*4, 2); // pc emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); emit_far_jump(func); } @@ -4249,19 +4272,21 @@ static void hlecall_bad() assert(0); } -static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void hlecall_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { void *hlefunc = hlecall_bad; - uint32_t hleCode = source[i] & 0x03ffffff; + uint32_t hleCode = st->source[i] & 0x03ffffff; if (hleCode < ARRAY_SIZE(psxHLEt)) hlefunc = psxHLEt[hleCode]; - call_c_cpu_handler(i, i_regs, ccadj_, start + i*4+4, hlefunc); + call_c_cpu_handler(st, i, i_regs, ccadj_, st->start + i*4+4, hlefunc); } -static void intcall_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void intcall_assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { - call_c_cpu_handler(i, i_regs, ccadj_, start + i*4, execI); + call_c_cpu_handler(st, i, i_regs, ccadj_, st->start + i*4, execI); } static void speculate_mov(int rs,int rt) @@ -4280,7 +4305,7 @@ static void speculate_mov_weak(int rs,int rt) } } -static void speculate_register_values(int i) +static void speculate_register_values(struct compile_state *st, int i) { if(i==0) { // gp,sp are likely to stay the same throughout the block @@ -4310,7 +4335,7 @@ static void speculate_register_values(int i) int hr = get_reg_w(regs[i].regmap, dops[i].rt1); u_int value; if(hr>=0) { - if(get_final_value(hr,i,&value)) + if (get_final_value(st, hr, i, &value)) ndrc_smrv_regs[dops[i].rt1]=value; else ndrc_smrv_regs[dops[i].rt1]=constmap[i][hr]; smrv_strong_next|=1<>24)==0xa0)) { + if(st->start<0x2000&&(dops[i].rt1==26||(ndrc_smrv_regs[dops[i].rt1]>>24)==0xa0)) { // special case for BIOS ndrc_smrv_regs[dops[i].rt1]=0xa0000000; smrv_strong_next|=1<start+i*4, ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); #endif } -static void ujump_assemble(int i, const struct regstat *i_regs); -static void rjump_assemble(int i, const struct regstat *i_regs); -static void cjump_assemble(int i, const struct regstat *i_regs); -static void sjump_assemble(int i, const struct regstat *i_regs); +static void ujump_assemble(struct compile_state *st, int i, const struct regstat *i_regs); +static void rjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs); +static void cjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs); +static void sjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs); -static int assemble(int i, const struct regstat *i_regs, int ccadj_) +static int assemble(struct compile_state *st, int i, + const struct regstat *i_regs, int ccadj_) { int ds = 0; switch (dops[i].itype) { @@ -4378,62 +4404,62 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) shiftimm_assemble(i, i_regs); break; case LOAD: - load_assemble(i, i_regs, ccadj_); + load_assemble(st, i, i_regs, ccadj_); break; case LOADLR: - loadlr_assemble(i, i_regs, ccadj_); + loadlr_assemble(st, i, i_regs, ccadj_); break; case STORE: - store_assemble(i, i_regs, ccadj_); + store_assemble(st, i, i_regs, ccadj_); break; case STORELR: - storelr_assemble(i, i_regs, ccadj_); + storelr_assemble(st, i, i_regs, ccadj_); break; case COP0: - cop0_assemble(i, i_regs, ccadj_); + cop0_assemble(st, i, i_regs, ccadj_); break; case RFE: rfe_assemble(i, i_regs); break; case COP2: - cop2_assemble(i, i_regs); + cop2_assemble(st, i, i_regs); break; case C2LS: - c2ls_assemble(i, i_regs, ccadj_); + c2ls_assemble(st, i, i_regs, ccadj_); break; case C2OP: - c2op_assemble(i, i_regs); + c2op_assemble(st, i, i_regs); break; case MULTDIV: multdiv_assemble(i, i_regs); - multdiv_prepare_stall(i, i_regs, ccadj_); + multdiv_prepare_stall(st, i, i_regs, ccadj_); break; case MOV: mov_assemble(i, i_regs); break; case SYSCALL: - exception_assemble(i, i_regs, ccadj_); + exception_assemble(st, i, i_regs, ccadj_); break; case HLECALL: - hlecall_assemble(i, i_regs, ccadj_); + hlecall_assemble(st, i, i_regs, ccadj_); break; case INTCALL: - intcall_assemble(i, i_regs, ccadj_); + intcall_assemble(st, i, i_regs, ccadj_); break; case UJUMP: - ujump_assemble(i, i_regs); + ujump_assemble(st, i, i_regs); ds = 1; break; case RJUMP: - rjump_assemble(i, i_regs); + rjump_assemble(st, i, i_regs); ds = 1; break; case CJUMP: - cjump_assemble(i, i_regs); + cjump_assemble(st, i, i_regs); ds = 1; break; case SJUMP: - sjump_assemble(i, i_regs); + sjump_assemble(st, i, i_regs); ds = 1; break; case NOP: @@ -4446,10 +4472,10 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) return ds; } -static void ds_assemble(int i, const struct regstat *i_regs) +static void ds_assemble(struct compile_state *st, int i, const struct regstat *i_regs) { - speculate_register_values(i); - is_delayslot = 1; + speculate_register_values(st, i); + st->is_delayslot = 1; switch (dops[i].itype) { case SYSCALL: case HLECALL: @@ -4461,16 +4487,16 @@ static void ds_assemble(int i, const struct regstat *i_regs) SysPrintf("Jump in the delay slot. This is probably a bug.\n"); break; default: - assemble(i, i_regs, cinfo[i].ccadj); + assemble(st, i, i_regs, cinfo[i].ccadj); } - is_delayslot = 0; + st->is_delayslot = 0; } // Is the branch target a valid internal jump? -static int internal_branch(int addr) +static int internal_branch(struct compile_state *st, int addr) { - if(addr&1) return 0; // Indirect (register) jump - if(addr>=start && addr= st->start && addr < st->start + st->slen*4-4) { return 1; } @@ -4663,16 +4689,16 @@ static void address_generation(int i, const struct regstat *i_regs, signed char } } -static int get_final_value(int hr, int i, u_int *value) +static int get_final_value(struct compile_state *st, int hr, int i, u_int *value) { int reg=regs[i].regmap[hr]; - while(islen-1) { if(regs[i+1].regmap[hr]!=reg) break; if(!((regs[i+1].isconst>>hr)&1)) break; if(dops[i+1].bt) break; i++; } - if(islen-1) { if (dops[i].is_jump) { *value=constmap[i][hr]; return 1; @@ -4698,13 +4724,14 @@ static int get_final_value(int hr, int i, u_int *value) } *value=constmap[i][hr]; //printf("c=%lx\n",(long)constmap[i][hr]); - if(i==slen-1) return 1; + if(i==st->slen-1) return 1; assert(reg < 64); return !((unneeded_reg[i+1]>>reg)&1); } // Load registers with known constants -static void load_consts(signed char pre[],signed char regmap[],int i) +static void load_consts(struct compile_state *st, signed char pre[], + signed char regmap[], int i) { int hr,hr2; // propagate loaded constant flags @@ -4729,7 +4756,7 @@ static void load_consts(signed char pre[],signed char regmap[],int i) assert(regmap[hr]<64); if(((regs[i].isconst>>hr)&1)&®map[hr]>0) { u_int value, similar=0; - if(get_final_value(hr,i,&value)) { + if(get_final_value(st, hr, i, &value)) { // see if some other register has similar value for(hr2=0;hr2>hr2)&1)) { @@ -4741,7 +4768,7 @@ static void load_consts(signed char pre[],signed char regmap[],int i) } if(similar) { u_int value2; - if(get_final_value(hr2,i,&value2)) // is this needed? + if(get_final_value(st, hr2, i, &value2)) // is this needed? emit_movimm_from(value2,hr2,value,hr); else emit_movimm(value,hr); @@ -4802,10 +4829,11 @@ static void wb_dirtys(const signed char i_regmap[], u_int i_dirty) // Write out dirty registers that we need to reload (pair with load_needed_regs) // This writes the registers not written by store_regs_bt -static void wb_needed_dirtys(const signed char i_regmap[], u_int i_dirty, int addr) +static void wb_needed_dirtys(struct compile_state *st, const signed char i_regmap[], + u_int i_dirty, int addr) { int hr; - int t=(addr-start)>>2; + int t=(addr-st->start)>>2; for(hr=0;hr0) { @@ -4868,11 +4896,12 @@ static void load_regs_entry(int t) } // Store dirty registers prior to branch -static void store_regs_bt(signed char i_regmap[],uint64_t i_dirty,int addr) +static void store_regs_bt(struct compile_state *st, signed char i_regmap[], + uint64_t i_dirty, int addr) { - if(internal_branch(addr)) + if (internal_branch(st, addr)) { - int t=(addr-start)>>2; + int t=(addr-st->start)>>2; int hr; for(hr=0;hr=start && addr<(start+slen*4)) - if(internal_branch(addr)) + //if(addr>=st->start && addr<(st->start+st->slen*4)) + if (internal_branch(st, addr)) { - int t=(addr-start)>>2; + int t=(addr - st->start)>>2; int hr; // Store the cycle count before loading something else if(i_regmap[HOST_CCREG]!=CCREG) { @@ -4927,11 +4957,12 @@ static void load_regs_bt(signed char i_regmap[],uint64_t i_dirty,int addr) } } -static int match_bt(signed char i_regmap[],uint64_t i_dirty,int addr) +static int match_bt(struct compile_state *st, signed char i_regmap[], + uint64_t i_dirty, int addr) { - if(addr>=start && addr= st->start && addr < st->start + st->slen*4-4) { - int t=(addr-start)>>2; + int t=(addr-st->start)>>2; int hr; if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) return 0; for(hr=0;hrstart+i*4); save_regs(reglist); // write out changed consts to match the interpreter if (i > 0 && !dops[i].bt) { @@ -5032,7 +5063,7 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) emit_movimm(cinfo[i].imm << 16, 0); emit_storereg(dops[i].rt1, 0); } - emit_movimm(start+i*4,0); + emit_movimm(st->start+i*4,0); emit_writeword(0,&psxRegs.pc); int cc = get_reg(regs[i].regmap_entry, CCREG); if (cc < 0) @@ -5062,12 +5093,12 @@ static void drc_dbg_emit_wb_dirtys(int i, const struct regstat *i_regs) #endif // Used when a branch jumps into the delay slot of another branch -static void ds_assemble_entry(int i) +static void ds_assemble_entry(struct compile_state *st, int i) { - int t = (cinfo[i].ba - start) >> 2; + int t = (cinfo[i].ba - st->start) >> 2; int ccadj_ = -CLOCK_ADJUST(1); - if (!instr_addr[t]) - instr_addr[t] = out; + if (!st->instr_addr[t]) + st->instr_addr[t] = out; assem_debug("Assemble delay slot at %x\n",cinfo[i].ba); assem_debug("<->\n"); drc_dbg_emit_do_cmp(t, ccadj_); @@ -5079,7 +5110,7 @@ static void ds_assemble_entry(int i) load_reg(regs[t].regmap_entry,regs[t].regmap,ROREG); if (dops[t].is_store) load_reg(regs[t].regmap_entry,regs[t].regmap,INVCP); - is_delayslot=0; + st->is_delayslot = 0; switch (dops[t].itype) { case SYSCALL: case HLECALL: @@ -5091,16 +5122,16 @@ static void ds_assemble_entry(int i) SysPrintf("Jump in the delay slot. This is probably a bug.\n"); break; default: - assemble(t, ®s[t], ccadj_); + assemble(st, t, ®s[t], ccadj_); } - store_regs_bt(regs[t].regmap,regs[t].dirty,cinfo[i].ba+4); - load_regs_bt(regs[t].regmap,regs[t].dirty,cinfo[i].ba+4); - if(internal_branch(cinfo[i].ba+4)) + store_regs_bt(st, regs[t].regmap, regs[t].dirty, cinfo[i].ba+4); + load_regs_bt(st,regs[t].regmap,regs[t].dirty,cinfo[i].ba+4); + if (internal_branch(st, cinfo[i].ba+4)) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - assert(internal_branch(cinfo[i].ba+4)); - add_to_linker(out,cinfo[i].ba+4,internal_branch(cinfo[i].ba+4)); + assert(internal_branch(st, cinfo[i].ba+4)); + add_to_linker(st,out,cinfo[i].ba+4, internal_branch(st, cinfo[i].ba+4)); emit_jmp(0); } @@ -5111,8 +5142,8 @@ static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) emit_movimm_from(imm1,rt1,imm2,rt2); } -static void do_cc(int i, const signed char i_regmap[], int *adj, - int addr, int taken, int invert) +static void do_cc(struct compile_state *st, int i, const signed char i_regmap[], + int *adj, int addr, int taken, int invert) { int count, count_plus2; void *jaddr; @@ -5122,10 +5153,10 @@ static void do_cc(int i, const signed char i_regmap[], int *adj, { *adj=0; } - //if(cinfo[i].ba>=start && cinfo[i].ba<(start+slen*4)) - if(internal_branch(cinfo[i].ba)) + //if(cinfo[i].ba>=st->start && cinfo[i].ba<(st->start+st->slen*4)) + if(internal_branch(st,cinfo[i].ba)) { - t=(cinfo[i].ba-start)>>2; + t=(cinfo[i].ba-st->start)>>2; if(dops[t].is_ds) *adj=-CLOCK_ADJUST(1); // Branch into delay slot adds an extra cycle else *adj=cinfo[t].ccadj; } @@ -5135,7 +5166,7 @@ static void do_cc(int i, const signed char i_regmap[], int *adj, } count = cinfo[i].ccadj; count_plus2 = count + CLOCK_ADJUST(2); - if(taken==TAKEN && i==(cinfo[i].ba-start)>>2 && source[i+1]==0) { + if(taken==TAKEN && i==(cinfo[i].ba-st->start)>>2 && st->source[i+1]==0) { // Idle loop if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG); idle=out; @@ -5167,10 +5198,10 @@ static void do_cc(int i, const signed char i_regmap[], int *adj, add_stub(CC_STUB,jaddr,idle?idle:out,(*adj==0||invert||idle)?0:count_plus2,i,addr,taken,0); } -static void do_ccstub(int n) +static void do_ccstub(struct compile_state *st, int n) { literal_pool(256); - assem_debug("do_ccstub %x\n",start+(u_int)stubs[n].b*4); + assem_debug("do_ccstub %x\n",st->start+(u_int)stubs[n].b*4); set_jump_target(stubs[n].addr, out); int i = stubs[n].b; int r_pc = -1; @@ -5178,8 +5209,8 @@ static void do_ccstub(int n) wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty); } else { - if(internal_branch(cinfo[i].ba)) - wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + if(internal_branch(st,cinfo[i].ba)) + wb_needed_dirtys(st, branch_regs[i].regmap, branch_regs[i].dirty, cinfo[i].ba); } if(stubs[n].c!=-1) { @@ -5257,9 +5288,9 @@ static void do_ccstub(int n) #ifdef HAVE_CMOV_IMM if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(cinfo[i].ba,start+i*4+8,addr); + emit_cmov2imm_e_ne_compact(cinfo[i].ba,st->start+i*4+8,addr); #else - emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt); + emit_mov2imm_compact(cinfo[i].ba,addr,st->start+i*4+8,alt); if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); emit_cmovne_reg(alt,addr); @@ -5270,9 +5301,9 @@ static void do_ccstub(int n) #ifdef HAVE_CMOV_IMM if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); - emit_cmov2imm_e_ne_compact(start+i*4+8,cinfo[i].ba,addr); + emit_cmov2imm_e_ne_compact(st->start+i*4+8,cinfo[i].ba,addr); #else - emit_mov2imm_compact(start+i*4+8,addr,cinfo[i].ba,alt); + emit_mov2imm_compact(st->start+i*4+8,addr,cinfo[i].ba,alt); if(s2l>=0) emit_cmp(s1l,s2l); else emit_test(s1l,s1l); emit_cmovne_reg(alt,addr); @@ -5281,32 +5312,32 @@ static void do_ccstub(int n) else if (dops[i].opcode == 6) // BLEZ { //emit_movimm(cinfo[i].ba,alt); - //emit_movimm(start+i*4+8,addr); - emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr); + //emit_movimm(st->start+i*4+8,addr); + emit_mov2imm_compact(cinfo[i].ba,alt,st->start+i*4+8,addr); emit_cmpimm(s1l,1); emit_cmovl_reg(alt,addr); } else if (dops[i].opcode == 7) // BGTZ { //emit_movimm(cinfo[i].ba,addr); - //emit_movimm(start+i*4+8,ntaddr); - emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,ntaddr); + //emit_movimm(st->start+i*4+8,ntaddr); + emit_mov2imm_compact(cinfo[i].ba,addr,st->start+i*4+8,ntaddr); emit_cmpimm(s1l,1); emit_cmovl_reg(ntaddr,addr); } else if (dops[i].itype == SJUMP) // BLTZ/BGEZ { //emit_movimm(cinfo[i].ba,alt); - //emit_movimm(start+i*4+8,addr); + //emit_movimm(st->start+i*4+8,addr); if (dops[i].rs1) { emit_mov2imm_compact(cinfo[i].ba, - (dops[i].opcode2 & 1) ? addr : alt, start + i*4 + 8, + (dops[i].opcode2 & 1) ? addr : alt, st->start + i*4 + 8, (dops[i].opcode2 & 1) ? alt : addr); emit_test(s1l,s1l); emit_cmovs_reg(alt,addr); } else - emit_movimm((dops[i].opcode2 & 1) ? cinfo[i].ba : start + i*4 + 8, addr); + emit_movimm((dops[i].opcode2 & 1) ? cinfo[i].ba : st->start + i*4 + 8, addr); } r_pc = addr; } @@ -5327,8 +5358,8 @@ static void do_ccstub(int n) emit_far_call(cc_interrupt); if(stubs[n].a) emit_addimm(HOST_CCREG,-(int)stubs[n].a,HOST_CCREG); if(stubs[n].d==TAKEN) { - if(internal_branch(cinfo[i].ba)) - load_needed_regs(branch_regs[i].regmap,regs[(cinfo[i].ba-start)>>2].regmap_entry); + if(internal_branch(st,cinfo[i].ba)) + load_needed_regs(branch_regs[i].regmap,regs[(cinfo[i].ba-st->start)>>2].regmap_entry); else if(dops[i].itype==RJUMP) { if(get_reg(branch_regs[i].regmap,RTEMP)>=0) emit_readword(&psxRegs.pc,get_reg(branch_regs[i].regmap,RTEMP)); @@ -5336,7 +5367,7 @@ static void do_ccstub(int n) emit_loadreg(dops[i].rs1,get_reg(branch_regs[i].regmap,dops[i].rs1)); } }else if(stubs[n].d==NOTTAKEN) { - if(islen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]); else load_all_regs(branch_regs[i].regmap); }else{ load_all_regs(branch_regs[i].regmap); @@ -5347,31 +5378,32 @@ static void do_ccstub(int n) do_jump_vaddr(stubs[n].e); } -static void add_to_linker(void *addr, u_int target, int is_internal) +static void add_to_linker(struct compile_state *st, void *addr, u_int target, + int is_internal) { - assert(linkcount < ARRAY_SIZE(link_addr)); - link_addr[linkcount].addr = addr; - link_addr[linkcount].target = target; - link_addr[linkcount].internal = is_internal; - linkcount++; + assert(st->linkcount < ARRAY_SIZE(st->link_addr)); + st->link_addr[st->linkcount].addr = addr; + st->link_addr[st->linkcount].target = target; + st->link_addr[st->linkcount].internal = is_internal; + st->linkcount++; } -static void ujump_assemble_write_ra(int i) +static void ujump_assemble_write_ra(struct compile_state *st, int i) { int rt; unsigned int return_address; rt=get_reg(branch_regs[i].regmap,31); //assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); //assert(rt>=0); - return_address=start+i*4+8; + return_address=st->start+i*4+8; if(rt>=0) { #ifdef USE_MINI_HT - if(internal_branch(return_address)&&dops[i+1].rt1!=31) { + if(internal_branch(st,return_address)&&dops[i+1].rt1!=31) { int temp=-1; // note: must be ds-safe #ifdef HOST_TEMPREG temp=HOST_TEMPREG; #endif - if(temp>=0) do_miniht_insert(return_address,rt,temp); + if(temp>=0) do_miniht_insert(st, return_address, rt, temp); else emit_movimm(return_address,rt); } else @@ -5392,23 +5424,23 @@ static void ujump_assemble_write_ra(int i) } } -static void ujump_assemble(int i, const struct regstat *i_regs) +static void ujump_assemble(struct compile_state *st, int i, const struct regstat *i_regs) { - if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n"); + if(i==(cinfo[i].ba-st->start)>>2) assem_debug("idle loop\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); if(dops[i].rt1==31&&temp>=0) { signed char *i_regmap=i_regs->regmap; - int return_address=start+i*4+8; + int return_address=st->start+i*4+8; if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } #endif if (dops[i].rt1 == 31) - ujump_assemble_write_ra(i); // writeback ra for DS - ds_assemble(i+1,i_regs); + ujump_assemble_write_ra(st, i); // writeback ra for DS + ds_assemble(st, i+1, i_regs); uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded|=1|(1LL<=0) emit_prefetchreg(temp); #endif - do_cc(i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0); + do_cc(st,i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0); if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); - if(internal_branch(cinfo[i].ba)) + load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + if(internal_branch(st,cinfo[i].ba)) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal_branch(cinfo[i].ba) && dops[(cinfo[i].ba-start)>>2].is_ds) { - ds_assemble_entry(i); + if (internal_branch(st, cinfo[i].ba) && dops[(cinfo[i].ba-st->start)>>2].is_ds) { + ds_assemble_entry(st, i); } else { - add_to_linker(out,cinfo[i].ba,internal_branch(cinfo[i].ba)); + add_to_linker(st, out, cinfo[i].ba, internal_branch(st,cinfo[i].ba)); emit_jmp(0); } } -static void rjump_assemble_write_ra(int i) +static void rjump_assemble_write_ra(struct compile_state *st, int i) { int rt,return_address; rt=get_reg_w(branch_regs[i].regmap, dops[i].rt1); //assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(rt>=0); - return_address=start+i*4+8; + return_address=st->start+i*4+8; #ifdef REG_PREFETCH if(temp>=0) { @@ -5456,7 +5488,7 @@ static void rjump_assemble_write_ra(int i) #endif } -static void rjump_assemble(int i, const struct regstat *i_regs) +static void rjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs) { int temp; int rs,cc; @@ -5476,7 +5508,7 @@ static void rjump_assemble(int i, const struct regstat *i_regs) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { signed char *i_regmap=i_regs->regmap; - int return_address=start+i*4+8; + int return_address=st->start+i*4+8; if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp); } } @@ -5488,8 +5520,8 @@ static void rjump_assemble(int i, const struct regstat *i_regs) } #endif if (dops[i].rt1 != 0) - rjump_assemble_write_ra(i); - ds_assemble(i+1,i_regs); + rjump_assemble_write_ra(st, i); + ds_assemble(st, i+1, i_regs); uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded|=1|(1LL<>rs)&1) { if(dops[i].rs1!=dops[i+1].rt1&&dops[i].rs1!=dops[i+1].rt2) { @@ -5523,7 +5555,7 @@ static void rjump_assemble(int i, const struct regstat *i_regs) do_miniht_load(ht,rh); } #endif - //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN); + //do_cc(st,i,branch_regs[i].regmap,&adj,-1,TAKEN); //if(adj) emit_addimm(cc,2*(cinfo[i].ccadj+2-adj),cc); // ??? - Shouldn't happen //assert(adj==0); emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG); @@ -5533,7 +5565,7 @@ static void rjump_assemble(int i, const struct regstat *i_regs) emit_jmp(0); else emit_jns(0); - //load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1); + //load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT if(dops[i].rs1==31) { do_miniht_jump(rs,rh,ht); @@ -5544,7 +5576,7 @@ static void rjump_assemble(int i, const struct regstat *i_regs) do_jump_vaddr(rs); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(dops[i].rt1!=31&&islen-2&&(((u_int)out)&7)) emit_mov(13,13); #endif } @@ -5594,22 +5626,22 @@ static void vsync_hack_assemble(int i, int ld_ofs, int cc) host_tempreg_release(); } -static void cjump_assemble(int i, const struct regstat *i_regs) +static void cjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs) { const signed char *i_regmap = i_regs->regmap; int cc; int match; - match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + match = match_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); assem_debug("match=%d\n",match); int s1l,s2l; int unconditional=0,nop=0; int invert=0; - int internal=internal_branch(cinfo[i].ba); - if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n"); + int internal=internal_branch(st,cinfo[i].ba); + if(i==(cinfo[i].ba-st->start)>>2) assem_debug("idle loop\n"); if(!match) invert=1; if (vsync_hack && (vsync_hack >> 16) == i) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(i>(cinfo[i].ba-start)>>2) invert=1; + if(i>(cinfo[i].ba-st->start)>>2) invert=1; #endif #ifdef __aarch64__ invert=1; // because of near cond. branches @@ -5646,7 +5678,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); - ds_assemble(i+1,i_regs); + ds_assemble(st, i+1, i_regs); int adj; uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded&=~((1LL<>2 || source[i+1]!=0) { + do_cc(st,i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0); + if(i!=(cinfo[i].ba-st->start)>>2 || st->source[i+1]!=0) { if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(cinfo[i].ba-start)>>2].is_ds) { - ds_assemble_entry(i); + if (internal && dops[(cinfo[i].ba - st->start)>>2].is_ds) { + ds_assemble_entry(st, i); } else { - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st, out, cinfo[i].ba, internal); emit_jmp(0); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -5685,11 +5717,11 @@ static void cjump_assemble(int i, const struct regstat *i_regs) emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0); } else { void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL; - do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); + do_cc(st,i,branch_regs[i].regmap,&adj,-1,0,invert); if(adj&&!invert) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); @@ -5702,7 +5734,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jne(DJT_1); }else{ - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jeq(0); } } @@ -5714,7 +5746,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jeq(DJT_1); }else{ - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jne(0); } } @@ -5725,7 +5757,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jge(DJT_1); }else{ - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jl(0); } } @@ -5736,7 +5768,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jl(DJT_1); }else{ - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jge(0); } } @@ -5745,30 +5777,30 @@ static void cjump_assemble(int i, const struct regstat *i_regs) if (vsync_hack && (vsync_hack >> 16) == i) vsync_hack_assemble(i, vsync_hack & 0xffff, cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if (match && (!internal || !dops[(cinfo[i].ba-start)>>2].is_ds)) { + if (match && (!internal || !dops[(cinfo[i].ba-st->start)>>2].is_ds)) { if(adj) { emit_addimm(cc,-adj,cc); - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); }else{ emit_addnop(13); - add_to_linker(out,cinfo[i].ba,internal*2); + add_to_linker(st,out,cinfo[i].ba,internal*2); } emit_jmp(0); }else #endif { if(adj) emit_addimm(cc,-adj,cc); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { - ds_assemble_entry(i); + if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) { + ds_assemble_entry(st, i); } else { - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jmp(0); } } @@ -5830,7 +5862,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) if (ram_offset) load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); - ds_assemble(i+1,&branch_regs[i]); + ds_assemble(st, i+1, &branch_regs[i]); drc_dbg_emit_wb_dirtys(i+1, &branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if(cc==-1) { @@ -5838,20 +5870,20 @@ static void cjump_assemble(int i, const struct regstat *i_regs) // CHECK: Is the following instruction (fall thru) allocated ok? } assert(cc==HOST_CCREG); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); - do_cc(i,i_regmap,&adj,cinfo[i].ba,TAKEN,0); + store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + do_cc(st,i,i_regmap,&adj,cinfo[i].ba,TAKEN,0); assem_debug("cycle count (adj)\n"); if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { - ds_assemble_entry(i); + if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) { + ds_assemble_entry(st, i); } else { - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jmp(0); } } @@ -5867,7 +5899,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) if (ram_offset) load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); - ds_assemble(i+1,&branch_regs[i]); + ds_assemble(st, i+1, &branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if (cc == -1) { // Cycle count isn't in a register, temporarily load it then write it out @@ -5875,7 +5907,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0); emit_storereg(CCREG,HOST_CCREG); } else{ @@ -5884,27 +5916,27 @@ static void cjump_assemble(int i, const struct regstat *i_regs) emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0); } } } } -static void sjump_assemble(int i, const struct regstat *i_regs) +static void sjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs) { const signed char *i_regmap = i_regs->regmap; int cc; int match; - match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + match = match_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); assem_debug("smatch=%d ooo=%d\n", match, dops[i].ooo); int s1l; int unconditional=0,nevertaken=0; int invert=0; - int internal=internal_branch(cinfo[i].ba); - if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n"); + int internal=internal_branch(st,cinfo[i].ba); + if(i==(cinfo[i].ba-st->start)>>2) assem_debug("idle loop\n"); if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if(i>(cinfo[i].ba-start)>>2) invert=1; + if(i>(cinfo[i].ba-st->start)>>2) invert=1; #endif #ifdef __aarch64__ invert=1; // because of near cond. branches @@ -5934,7 +5966,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); - ds_assemble(i+1,i_regs); + ds_assemble(st, i+1, i_regs); int adj; uint64_t bc_unneeded=branch_regs[i].u; bc_unneeded&=~((1LL<=0) { // Save the PC even if the branch is not taken - return_address=start+i*4+8; + return_address=st->start+i*4+8; emit_movimm(return_address,rt); // PC into link register #ifdef IMM_PREFETCH if(!nevertaken) emit_prefetch(hash_table_get(return_address)); @@ -5958,23 +5990,23 @@ static void sjump_assemble(int i, const struct regstat *i_regs) cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); if(unconditional) - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); - //do_cc(i,branch_regs[i].regmap,&adj,unconditional?cinfo[i].ba:-1,unconditional); + store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + //do_cc(st,i,branch_regs[i].regmap,&adj,unconditional?cinfo[i].ba:-1,unconditional); assem_debug("cycle count (adj)\n"); if(unconditional) { - do_cc(i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0); - if(i!=(cinfo[i].ba-start)>>2 || source[i+1]!=0) { + do_cc(st,i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0); + if(i!=(cinfo[i].ba-st->start)>>2 || st->source[i+1]!=0) { if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { - ds_assemble_entry(i); + if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) { + ds_assemble_entry(st, i); } else { - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jmp(0); } #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -5986,11 +6018,11 @@ static void sjump_assemble(int i, const struct regstat *i_regs) emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0); } else { void *nottaken = NULL; - do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); + do_cc(st,i,branch_regs[i].regmap,&adj,-1,0,invert); if(adj&&!invert) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); { assert(s1l>=0); @@ -6001,7 +6033,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_jns(DJT_1); }else{ - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_js(0); } } @@ -6012,7 +6044,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) nottaken=out; emit_js(DJT_1); }else{ - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jns(0); } } @@ -6020,30 +6052,30 @@ static void sjump_assemble(int i, const struct regstat *i_regs) if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - if (match && (!internal || !dops[(cinfo[i].ba - start) >> 2].is_ds)) { + if (match && (!internal || !dops[(cinfo[i].ba - st->start) >> 2].is_ds)) { if(adj) { emit_addimm(cc,-adj,cc); - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); }else{ emit_addnop(13); - add_to_linker(out,cinfo[i].ba,internal*2); + add_to_linker(st,out,cinfo[i].ba,internal*2); } emit_jmp(0); }else #endif { if(adj) emit_addimm(cc,-adj,cc); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { - ds_assemble_entry(i); + if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) { + ds_assemble_entry(st, i); } else { - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jmp(0); } } @@ -6069,7 +6101,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) rt = get_reg(branch_regs[i].regmap,31); if(rt >= 0) { // Save the PC even if the branch is not taken - return_address = start + i*4+8; + return_address = st->start + i*4+8; emit_movimm(return_address, rt); // PC into link register #ifdef IMM_PREFETCH emit_prefetch(hash_table_get(return_address)); @@ -6097,27 +6129,27 @@ static void sjump_assemble(int i, const struct regstat *i_regs) if (ram_offset) load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); - ds_assemble(i+1,&branch_regs[i]); + ds_assemble(st, i+1, &branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if(cc==-1) { emit_loadreg(CCREG,cc=HOST_CCREG); // CHECK: Is the following instruction (fall thru) allocated ok? } assert(cc==HOST_CCREG); - store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); - do_cc(i,i_regmap,&adj,cinfo[i].ba,TAKEN,0); + store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + do_cc(st,i,i_regmap,&adj,cinfo[i].ba,TAKEN,0); assem_debug("cycle count (adj)\n"); if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc); - load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); + load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba); if(internal) assem_debug("branch: internal\n"); else assem_debug("branch: external\n"); - if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) { - ds_assemble_entry(i); + if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) { + ds_assemble_entry(st, i); } else { - add_to_linker(out,cinfo[i].ba,internal); + add_to_linker(st,out,cinfo[i].ba,internal); emit_jmp(0); } } @@ -6134,7 +6166,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) if (ram_offset) load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); - ds_assemble(i+1,&branch_regs[i]); + ds_assemble(st, i+1, &branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if (cc == -1) { // Cycle count isn't in a register, temporarily load it then write it out @@ -6142,7 +6174,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0); emit_storereg(CCREG,HOST_CCREG); } else{ @@ -6151,7 +6183,7 @@ static void sjump_assemble(int i, const struct regstat *i_regs) emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc); void *jaddr=out; emit_jns(0); - add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); + add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0); } } } @@ -6195,7 +6227,7 @@ void print_regmap(const char *name, const signed char *regmap) } /* disassembly */ -void disassemble_inst(int i) +void disassemble_inst(int i, u_int start, u_int *source) { if (dops[i].bt) printf("*"); else printf(" "); switch(dops[i].itype) { @@ -6283,7 +6315,7 @@ void disassemble_inst(int i) } #else #define set_mnemonic(i_, n_) -static void disassemble_inst(int i) {} +static void disassemble_inst(int i, u_int start, u_int *source) {} #endif // DISASM #define DRC_TEST_VAL 0x74657374 @@ -6668,7 +6700,7 @@ static void force_intcall(int i) cinfo[i].ba = -1; } -static noinline void do_vsync(int i) +static noinline void do_vsync(struct compile_state *st, int i) { // lui a0, x; addiu a0, x; jal puts u32 addr = (cinfo[i].imm << 16) + (signed short)cinfo[i+1].imm; @@ -6681,7 +6713,7 @@ static noinline void do_vsync(int i) if (!str || strncmp(str, "VSync: timeout", 14)) return; // jal clearPad, jal clearRCnt; j return; nop - for (j = i+2; j < slen; j++) { + for (j = i+2; j < st->slen; j++) { if (dops[j].itype == SHIFTIMM || dops[j].itype == IMM16 || dops[j].itype == ALU) continue; if (dops[j].opcode == 0x03) { @@ -6689,45 +6721,45 @@ static noinline void do_vsync(int i) } break; } - if (j >= slen || jals_cnt != 3 || dops[j++].opcode != 0x02) + if (j >= st->slen || jals_cnt != 3 || dops[j++].opcode != 0x02) return; - for (; j < slen; j++) + for (; j < st->slen; j++) if (dops[j].itype != SHIFTIMM && dops[j].itype != IMM16) break; - if (j >= slen || dops[j].opcode != 0x23) // lw x, condition + if (j >= st->slen || dops[j].opcode != 0x23) // lw x, condition return; j += 2; if (dops[j].opcode != 0 || dops[j].opcode2 != 0x2A) // slt x, y return; if (dops[++j].opcode != 0x05) // bnez x, loop return; - t = (cinfo[j].ba - start) / 4; - if (t < 0 || t >= slen) + t = (cinfo[j].ba - st->start) / 4; + if (t < 0 || t >= st->slen) return; // lw x, d(sp) if (dops[t].opcode != 0x23 || dops[t].rs1 != 29 || (u32)cinfo[t].imm >= 1024) return; if (dops[t+2].opcode != 0x09 || cinfo[t+2].imm != -1) // addiu x, -1 return; - SysPrintf("vsync @%08x\n", start + t*4); + SysPrintf("vsync @%08x\n", st->start + t*4); vsync_hack = (j << 16) | (cinfo[t].imm & 0xffff); } -static int apply_hacks(void) +static int apply_hacks(struct compile_state *st) { int i; vsync_hack = 0; if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS)) return 0; /* special hack(s) */ - for (i = 0; i < slen - 4; i++) + for (i = 0; i < st->slen - 4; i++) { // lui a4, 0xf200; jal ; addu a0, 2; slti v0, 28224 - if (source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP - && source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a + if (st->source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP + && st->source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a && cinfo[i+3].imm == 0x6e40 && dops[i+3].rs1 == 2) { - SysPrintf("PE2 hack @%08x\n", start + (i+3)*4); + SysPrintf("PE2 hack @%08x\n", st->start + (i+3)*4); dops[i + 3].itype = NOP; } // see also: psxBiosCheckExe() @@ -6735,18 +6767,18 @@ static int apply_hacks(void) && dops[i+1].opcode == 0x09 && dops[i+1].rt1 == 4 && dops[i+1].rs1 == 4 && dops[i+2].opcode == 0x03) { - do_vsync(i); + do_vsync(st, i); } } - if (source[0] == 0x3c05edb8 && source[1] == 0x34a58320) + if (st->source[0] == 0x3c05edb8 && st->source[1] == 0x34a58320) { // lui a1, 0xEDB8; ori a1, 0x8320 - SysPrintf("F1 2000 hack @%08x\n", start); + SysPrintf("F1 2000 hack @%08x\n", st->start); cycle_multiplier_active = 100; } - i = slen; - if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008 - && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809 + i = st->slen; + if (i > 10 && st->source[i-1] == 0 && st->source[i-2] == 0x03e00008 + && st->source[i-4] == 0x8fbf0018 && st->source[i-6] == 0x00c0f809 && dops[i-7].itype == STORE) { i = i-8; @@ -6756,7 +6788,7 @@ static int apply_hacks(void) if (dops[i].itype == STORELR && dops[i].rs1 == 6 && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) { - SysPrintf("F1 hack from %08x, old dst %08x\n", start, hack_addr); + SysPrintf("F1 hack from %08x, old dst %08x\n", st->start, hack_addr); f1_hack = 1; return 1; } @@ -6764,9 +6796,9 @@ static int apply_hacks(void) #if 0 // alt vsync, not used if (Config.HLE) { - if (start <= psxRegs.biosBranchCheck && psxRegs.biosBranchCheck < start + i*4) + if (st->start <= psxRegs.biosBranchCheck && psxRegs.biosBranchCheck < st->start + i*4) { - i = (psxRegs.biosBranchCheck - start) / 4u + 23; + i = (psxRegs.biosBranchCheck - st->start) / 4u + 23; if (dops[i].is_jump && !dops[i+1].bt) { force_intcall(i); @@ -6788,7 +6820,7 @@ static int is_ld_use_hazard(const struct decoded_insn *op_ld, return op->itype != CJUMP && op->itype != SJUMP; } -static void disassemble_one(int i, u_int src) +static void disassemble_one(struct compile_state *st, int i, u_int src) { unsigned int type, op, op2, op3; enum ls_width_type ls_type = LS_32; @@ -6943,7 +6975,7 @@ static void disassemble_one(int i, u_int src) break; } if (type == INTCALL) - SysPrintf_lim("NI %08x @%08x (%08x)\n", src, start + i*4, start); + SysPrintf_lim("NI %08x @%08x (%08x)\n", src, st->start + i*4, st->start); dops[i].itype = type; dops[i].opcode2 = op2; dops[i].ls_type = ls_type; @@ -7089,7 +7121,7 @@ static void disassemble_one(int i, u_int src) } } -static noinline void pass1a_disassemble(u_int pagelimit) +static noinline void pass1a_disassemble(struct compile_state *st, u_int pagelimit) { int i, j, done = 0; int ds_next = 0; @@ -7098,8 +7130,9 @@ static noinline void pass1a_disassemble(u_int pagelimit) { int force_j_to_interpreter = 0; unsigned int type, op, op2; + u_int start = st->start; - disassemble_one(i, source[i]); + disassemble_one(st, i, st->source[i]); dops[i].is_ds = ds_next; ds_next = 0; type = dops[i].itype; op = dops[i].opcode; @@ -7107,13 +7140,13 @@ static noinline void pass1a_disassemble(u_int pagelimit) /* Calculate branch target addresses */ if(type==UJUMP) - cinfo[i].ba=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4); + cinfo[i].ba=((start+i*4+4)&0xF0000000)|(((unsigned int)st->source[i]<<6)>>4); else if(type==CJUMP&&dops[i].rs1==dops[i].rs2&&(op&1)) cinfo[i].ba=start+i*4+8; // Ignore never taken branch else if(type==SJUMP&&dops[i].rs1==0&&!(op2&1)) cinfo[i].ba=start+i*4+8; // Ignore never taken branch else if(type==CJUMP||type==SJUMP) - cinfo[i].ba=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14); + cinfo[i].ba=start+i*4+4+((signed int)((unsigned int)st->source[i]<<16)>>14); /* simplify always (not)taken branches */ if (type == CJUMP && dops[i].rs1 == dops[i].rs2) { @@ -7129,7 +7162,7 @@ static noinline void pass1a_disassemble(u_int pagelimit) dops[i].is_jump = type == RJUMP || type == UJUMP || type == CJUMP || type == SJUMP; dops[i].is_ujump = type == RJUMP || type == UJUMP; dops[i].is_load = type == LOAD || type == LOADLR || op == 0x32; // LWC2 - dops[i].is_delay_load = (dops[i].is_load || (source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC + dops[i].is_delay_load = (dops[i].is_load || (st->source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC dops[i].is_store = type == STORE || type == STORELR || op == 0x3a; // SWC2 dops[i].is_exception = type == SYSCALL || type == HLECALL || type == INTCALL; dops[i].may_except = dops[i].is_exception || (type == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8; @@ -7161,7 +7194,7 @@ static noinline void pass1a_disassemble(u_int pagelimit) u_int limit = 0; u_int *mem = get_source_start(cinfo[i-1].ba, &limit); if (mem != NULL) { - disassemble_one(MAXBLOCK - 1, mem[0]); + disassemble_one(st, MAXBLOCK - 1, mem[0]); dop = &dops[MAXBLOCK - 1]; } } @@ -7205,11 +7238,11 @@ static noinline void pass1a_disassemble(u_int pagelimit) done = 2; } if (i >= 2) { - if ((source[i-2] & 0xffe0f800) == 0x40806000 // MTC0 $12 + if ((st->source[i-2] & 0xffe0f800) == 0x40806000 // MTC0 $12 || (dops[i-2].is_jump && dops[i-2].rt1 == 31)) // call dops[i].bt = 1; } - if (i >= 1 && (source[i-1] & 0xffe0f800) == 0x40806800) // MTC0 $13 + if (i >= 1 && (st->source[i-1] & 0xffe0f800) == 0x40806800) // MTC0 $13 dops[i].bt = 1; /* Is this the end of the block? */ @@ -7229,9 +7262,9 @@ static noinline void pass1a_disassemble(u_int pagelimit) if ((u_int)(t - i) < 64 && start + (t+64)*4 < pagelimit) { // scan for a branch back to i+1 for (j = t; j < t + 64; j++) { - int tmpop = source[j] >> 26; + int tmpop = st->source[j] >> 26; if (tmpop == 1 || ((tmpop & ~3) == 4)) { - int t2 = j + 1 + (int)(signed short)source[j]; + int t2 = j + 1 + (int)(signed short)st->source[j]; if (t2 == i + 1) { //printf("blk expand %08x<-%08x\n", start + (i+1)*4, start + j*4); found_bbranch = 1; @@ -7280,20 +7313,21 @@ static noinline void pass1a_disassemble(u_int pagelimit) i--; assert(i > 0); assert(!dops[i-1].is_jump); - slen = i; + st->slen = i; } -static noinline void pass1b_bt(void) +static noinline void pass1b_bt(struct compile_state *st) { int i; - for (i = 0; i < slen; i++) - if (dops[i].is_jump && start <= cinfo[i].ba && cinfo[i].ba < start+slen*4) + for (i = 0; i < st->slen; i++) + if (dops[i].is_jump && st->start <= cinfo[i].ba && cinfo[i].ba < st->start+st->slen*4) // Internal branch, flag target - dops[(cinfo[i].ba - start) >> 2].bt = 1; + dops[(cinfo[i].ba - st->start) >> 2].bt = 1; } // Basic liveness analysis for MIPS registers -static noinline void pass2b_unneeded_regs(int istart, int iend, int r) +static noinline void pass2b_unneeded_regs(struct compile_state *st, + int istart, int iend, int r) { int i; uint64_t u,gte_u,b,gte_b; @@ -7301,7 +7335,7 @@ static noinline void pass2b_unneeded_regs(int istart, int iend, int r) uint64_t gte_u_unknown=0; if (HACK_ENABLED(NDHACK_GTE_UNNEEDED)) gte_u_unknown=~0ll; - if(iend==slen-1) { + if(iend==st->slen-1) { u=1; gte_u=gte_u_unknown; }else{ @@ -7315,7 +7349,7 @@ static noinline void pass2b_unneeded_regs(int istart, int iend, int r) //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); if(dops[i].is_jump) { - if(cinfo[i].ba=(start+slen*4)) + if(cinfo[i].ba < st->start || cinfo[i].ba >= (st->start + st->slen*4)) { // Branch out of this block, flush all regs u=1; @@ -7330,7 +7364,7 @@ static noinline void pass2b_unneeded_regs(int istart, int iend, int r) } else { - if(cinfo[i].ba<=start+i*4) { + if(cinfo[i].ba <= st->start+i*4) { // Backward branch if(dops[i].is_ujump) { @@ -7358,17 +7392,17 @@ static noinline void pass2b_unneeded_regs(int istart, int iend, int r) // Only go three levels deep. This recursion can take an // excessive amount of time if there are a lot of nested loops. if(r<2) { - pass2b_unneeded_regs((cinfo[i].ba-start)>>2, i-1, r+1); + pass2b_unneeded_regs(st, (cinfo[i].ba - st->start)>>2, i-1, r+1); }else{ - unneeded_reg[(cinfo[i].ba-start)>>2]=1; - gte_unneeded[(cinfo[i].ba-start)>>2]=gte_u_unknown; + unneeded_reg[(cinfo[i].ba - st->start)>>2]=1; + gte_unneeded[(cinfo[i].ba - st->start)>>2]=gte_u_unknown; } } /*else*/ if(1) { if (dops[i].is_ujump) { // Unconditional branch - u=unneeded_reg[(cinfo[i].ba-start)>>2]; - gte_u=gte_unneeded[(cinfo[i].ba-start)>>2]; + u=unneeded_reg[(cinfo[i].ba - st->start)>>2]; + gte_u=gte_unneeded[(cinfo[i].ba - st->start)>>2]; branch_unneeded_reg[i]=u; // Merge in delay slot u|=(1LL<>2]; - gte_b=gte_unneeded[(cinfo[i].ba-start)>>2]; + b=unneeded_reg[(cinfo[i].ba - st->start)>>2]; + gte_b=gte_unneeded[(cinfo[i].ba - st->start)>>2]; branch_unneeded_reg[i]=b; // Branch delay slot b|=(1LL<slen-1) { branch_unneeded_reg[i]&=unneeded_reg[i+2]; } else { branch_unneeded_reg[i]=1; @@ -7421,7 +7455,7 @@ static noinline void pass2b_unneeded_regs(int istart, int iend, int r) unneeded_reg[i]=u; gte_unneeded[i]=gte_u; /* - printf("ur (%d,%d) %x: ",istart,iend,start+i*4); + printf("ur (%d,%d) %x: ",istart,iend,st->start+i*4); printf("U:"); int r; for(r=1;r<=CCREG;r++) { @@ -7436,10 +7470,10 @@ static noinline void pass2b_unneeded_regs(int istart, int iend, int r) } } -static noinline void pass2a_unneeded(void) +static noinline void pass2a_unneeded(struct compile_state *st) { int i, j; - for (i = 0; i < slen; i++) + for (i = 0; i < st->slen; i++) { // remove redundant alignment checks if (dops[i].may_except && (dops[i].is_load || dops[i].is_store) @@ -7448,7 +7482,7 @@ static noinline void pass2a_unneeded(void) int base = dops[i].rs1, lsb = cinfo[i].imm, ls_type = dops[i].ls_type; int mask = ls_type == LS_32 ? 3 : 1; lsb &= mask; - for (j = i + 1; j < slen; j++) { + for (j = i + 1; j < st->slen; j++) { if (dops[j].bt || dops[j].is_jump) break; if ((dops[j].is_load || dops[j].is_store) && dops[j].rs1 == base @@ -7461,7 +7495,7 @@ static noinline void pass2a_unneeded(void) // rm redundant stack loads (unoptimized code, assuming no io mem access through sp) if (i > 0 && dops[i].is_load && dops[i].rs1 == 29 && dops[i].ls_type == LS_32 && dops[i-1].is_store && dops[i-1].rs1 == 29 && dops[i-1].ls_type == LS_32 - && dops[i-1].rs2 == dops[i].rt1 && !dops[i-1].is_ds && i < slen - 1 + && dops[i-1].rs2 == dops[i].rt1 && !dops[i-1].is_ds && i < st->slen - 1 && dops[i+1].rs1 != dops[i].rt1 && dops[i+1].rs2 != dops[i].rt1 && !dops[i].bt && cinfo[i].imm == cinfo[i-1].imm) { @@ -7472,14 +7506,14 @@ static noinline void pass2a_unneeded(void) } } -static noinline void pass3_register_alloc(u_int addr) +static noinline void pass3_register_alloc(struct compile_state *st, u_int addr) { struct regstat current; // Current register allocations/status clear_all_regs(current.regmap_entry); clear_all_regs(current.regmap); current.wasdirty = current.dirty = 0; current.u = unneeded_reg[0]; - alloc_reg(¤t, 0, CCREG); + alloc_reg(st,¤t, 0, CCREG); dirty_reg(¤t, CCREG); current.wasconst = 0; current.isconst = 0; @@ -7499,7 +7533,7 @@ static noinline void pass3_register_alloc(u_int addr) unneeded_reg[0]=1; } - for(i=0;islen;i++) { if(dops[i].bt) { @@ -7520,19 +7554,19 @@ static noinline void pass3_register_alloc(u_int addr) regs[i].isconst=0; regs[i].loadedconst=0; if (!dops[i].is_jump) { - if(i+1slen) { current.u=unneeded_reg[i+1]&~((1LL<slen) { current.u=branch_unneeded_reg[i]&~((1LL<start + i*4); abort(); } } @@ -7540,7 +7574,7 @@ static noinline void pass3_register_alloc(u_int addr) if(ds) { ds=0; // Skip delay slot, already allocated as part of branch // ...but we need to alloc it in case something jumps here - if(i+1slen) { current.u=branch_unneeded_reg[i-1]&unneeded_reg[i+1]; }else{ current.u=branch_unneeded_reg[i-1]; @@ -7551,7 +7585,7 @@ static noinline void pass3_register_alloc(u_int addr) memcpy(&temp,¤t,sizeof(current)); temp.wasdirty=temp.dirty; // TODO: Take into account unconditional branches, as below - delayslot_alloc(&temp,i); + delayslot_alloc(st, &temp,i); memcpy(regs[i].regmap,temp.regmap,sizeof(temp.regmap)); regs[i].wasdirty=temp.wasdirty; regs[i].dirty=temp.dirty; @@ -7597,16 +7631,16 @@ static noinline void pass3_register_alloc(u_int addr) alloc_cc(¤t,i); dirty_reg(¤t,CCREG); if (dops[i].rt1==31) { - alloc_reg(¤t,i,31); + alloc_reg(st,¤t,i,31); dirty_reg(¤t,31); //assert(dops[i+1].rs1!=31&&dops[i+1].rs2!=31); //assert(dops[i+1].rt1!=dops[i].rt1); #ifdef REG_PREFETCH - alloc_reg(¤t,i,PTEMP); + alloc_reg(st,¤t,i,PTEMP); #endif } dops[i].ooo=1; - delayslot_alloc(¤t,i+1); + delayslot_alloc(st, ¤t,i+1); //current.isconst=0; // DEBUG ds=1; break; @@ -7619,30 +7653,30 @@ static noinline void pass3_register_alloc(u_int addr) alloc_cc(¤t,i); dirty_reg(¤t,CCREG); if (!ds_writes_rjump_rs(i)) { - alloc_reg(¤t,i,dops[i].rs1); + alloc_reg(st,¤t,i,dops[i].rs1); if (dops[i].rt1!=0) { - alloc_reg(¤t,i,dops[i].rt1); + alloc_reg(st,¤t,i,dops[i].rt1); dirty_reg(¤t,dops[i].rt1); #ifdef REG_PREFETCH - alloc_reg(¤t,i,PTEMP); + alloc_reg(st,¤t,i,PTEMP); #endif } #ifdef USE_MINI_HT if(dops[i].rs1==31) { // JALR - alloc_reg(¤t,i,RHASH); - alloc_reg(¤t,i,RHTBL); + alloc_reg(st,¤t,i,RHASH); + alloc_reg(st,¤t,i,RHTBL); } #endif - delayslot_alloc(¤t,i+1); + delayslot_alloc(st,¤t,i+1); } else { // The delay slot overwrites our source register, // allocate a temporary register to hold the old value. current.isconst=0; current.wasconst=0; regs[i].wasconst=0; - delayslot_alloc(¤t,i+1); + delayslot_alloc(st,¤t,i+1); current.isconst=0; - alloc_reg(¤t,i,RTEMP); + alloc_reg(st,¤t,i,RTEMP); } //current.isconst=0; // DEBUG dops[i].ooo=1; @@ -7658,8 +7692,8 @@ static noinline void pass3_register_alloc(u_int addr) { alloc_cc(¤t,i); dirty_reg(¤t,CCREG); - if(dops[i].rs1) alloc_reg(¤t,i,dops[i].rs1); - if(dops[i].rs2) alloc_reg(¤t,i,dops[i].rs2); + if(dops[i].rs1) alloc_reg(st,¤t,i,dops[i].rs1); + if(dops[i].rs2) alloc_reg(st,¤t,i,dops[i].rs2); if((dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2))|| (dops[i].rs2&&(dops[i].rs2==dops[i+1].rt1||dops[i].rs2==dops[i+1].rt2))) { // The delay slot overwrites one of our conditions. @@ -7667,13 +7701,13 @@ static noinline void pass3_register_alloc(u_int addr) current.isconst=0; current.wasconst=0; regs[i].wasconst=0; - if(dops[i].rs1) alloc_reg(¤t,i,dops[i].rs1); - if(dops[i].rs2) alloc_reg(¤t,i,dops[i].rs2); + if(dops[i].rs1) alloc_reg(st,¤t,i,dops[i].rs1); + if(dops[i].rs2) alloc_reg(st,¤t,i,dops[i].rs2); } else { dops[i].ooo=1; - delayslot_alloc(¤t,i+1); + delayslot_alloc(st,¤t,i+1); } } else @@ -7681,19 +7715,19 @@ static noinline void pass3_register_alloc(u_int addr) { alloc_cc(¤t,i); dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,dops[i].rs1); + alloc_reg(st,¤t,i,dops[i].rs1); if(dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) { // The delay slot overwrites one of our conditions. // Allocate the branch condition registers instead. current.isconst=0; current.wasconst=0; regs[i].wasconst=0; - if(dops[i].rs1) alloc_reg(¤t,i,dops[i].rs1); + if(dops[i].rs1) alloc_reg(st,¤t,i,dops[i].rs1); } else { dops[i].ooo=1; - delayslot_alloc(¤t,i+1); + delayslot_alloc(st,¤t,i+1); } } else @@ -7705,8 +7739,8 @@ static noinline void pass3_register_alloc(u_int addr) regs[i].wasconst=0; alloc_cc(¤t,i); dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,dops[i].rs1); - alloc_reg(¤t,i,dops[i].rs2); + alloc_reg(st,¤t,i,dops[i].rs1); + alloc_reg(st,¤t,i,dops[i].rs2); } else if((dops[i].opcode&0x3E)==0x16) // BLEZL/BGTZL @@ -7716,7 +7750,7 @@ static noinline void pass3_register_alloc(u_int addr) regs[i].wasconst=0; alloc_cc(¤t,i); dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,dops[i].rs1); + alloc_reg(st,¤t,i,dops[i].rs1); } ds=1; //current.isconst=0; @@ -7727,9 +7761,9 @@ static noinline void pass3_register_alloc(u_int addr) { alloc_cc(¤t,i); dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,dops[i].rs1); + alloc_reg(st,¤t,i,dops[i].rs1); if (dops[i].rt1 == 31) { // BLTZAL/BGEZAL - alloc_reg(¤t,i,31); + alloc_reg(st,¤t,i,31); dirty_reg(¤t,31); } if ((dops[i].rs1 && @@ -7740,62 +7774,62 @@ static noinline void pass3_register_alloc(u_int addr) current.isconst=0; current.wasconst=0; regs[i].wasconst=0; - if(dops[i].rs1) alloc_reg(¤t,i,dops[i].rs1); + if(dops[i].rs1) alloc_reg(st,¤t,i,dops[i].rs1); } else { dops[i].ooo=1; - delayslot_alloc(¤t,i+1); + delayslot_alloc(st,¤t,i+1); } } ds=1; //current.isconst=0; break; case IMM16: - imm16_alloc(¤t,i); + imm16_alloc(st, ¤t,i); break; case LOAD: case LOADLR: - load_alloc(¤t,i); + load_alloc(st, ¤t,i); break; case STORE: case STORELR: - store_alloc(¤t,i); + store_alloc(st, ¤t,i); break; case ALU: - alu_alloc(¤t,i); + alu_alloc(st, ¤t,i); break; case SHIFT: - shift_alloc(¤t,i); + shift_alloc(st, ¤t,i); break; case MULTDIV: - multdiv_alloc(¤t,i); + multdiv_alloc(st, ¤t,i); break; case SHIFTIMM: - shiftimm_alloc(¤t,i); + shiftimm_alloc(st, ¤t,i); break; case MOV: - mov_alloc(¤t,i); + mov_alloc(st, ¤t,i); break; case COP0: - cop0_alloc(¤t,i); + cop0_alloc(st, ¤t,i); break; case RFE: - rfe_alloc(¤t,i); + rfe_alloc(st, ¤t,i); break; case COP2: - cop2_alloc(¤t,i); + cop2_alloc(st, ¤t,i); break; case C2LS: - c2ls_alloc(¤t,i); + c2ls_alloc(st, ¤t,i); break; case C2OP: - c2op_alloc(¤t,i); + c2op_alloc(st, ¤t, i); break; case SYSCALL: case HLECALL: case INTCALL: - syscall_alloc(¤t,i); + syscall_alloc(st, ¤t,i); break; } @@ -7869,7 +7903,7 @@ static noinline void pass3_register_alloc(u_int addr) alloc_cc(&branch_regs[i-1],i-1); dirty_reg(&branch_regs[i-1],CCREG); if(dops[i-1].rt1==31) { // JAL - alloc_reg(&branch_regs[i-1],i-1,31); + alloc_reg(st,&branch_regs[i-1],i-1,31); dirty_reg(&branch_regs[i-1],31); } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); @@ -7882,15 +7916,15 @@ static noinline void pass3_register_alloc(u_int addr) branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<slen) { // Internal branch will jump here, match registers to caller current.dirty=0; clear_all_regs(current.regmap); - alloc_reg(¤t,i,CCREG); + alloc_reg(st,¤t,i,CCREG); dirty_reg(¤t,CCREG); for(j=i-1;j>=0;j--) { - if(cinfo[j].ba==start+i*4+4) { + if(cinfo[j].ba==st->start+i*4+4) { memcpy(current.regmap,branch_regs[j].regmap,sizeof(current.regmap)); current.dirty=branch_regs[j].dirty; break; } } while(j>=0) { - if(cinfo[j].ba==start+i*4+4) { + if(cinfo[j].ba==st->start+i*4+4) { for(hr=0;hr2) + else if(dops[i].itype==C2OP&>e_cycletab[st->source[i]&0x3f]>2) { // this should really be removed since the real stalls have been implemented, // but doing so causes sizeable perf regression against the older version - u_int gtec = gte_cycletab[source[i] & 0x3f]; + u_int gtec = gte_cycletab[st->source[i] & 0x3f]; cc += HACK_ENABLED(NDHACK_NO_STALLS) ? gtec/2 : 2; } else if(i>1&&dops[i].itype==STORE&&dops[i-1].itype==STORE&&dops[i-2].itype==STORE&&!dops[i].bt) @@ -8061,19 +8095,19 @@ static noinline void pass3_register_alloc(u_int addr) } } -static noinline void pass4_cull_unused_regs(void) +static noinline void pass4_cull_unused_regs(struct compile_state *st) { u_int last_needed_regs[4] = {0,0,0,0}; u_int nr=0; int i; - for (i=slen-1;i>=0;i--) + for (i = st->slen - 1; i >= 0; i--) { int hr; __builtin_prefetch(regs[i-2].regmap); if(dops[i].is_jump) { - if(cinfo[i].ba=(start+slen*4)) + if(cinfo[i].bastart || cinfo[i].ba>=(st->start+st->slen*4)) { // Branch out of this block, don't need anything nr=0; @@ -8083,7 +8117,7 @@ static noinline void pass4_cull_unused_regs(void) // Internal branch // Need whatever matches the target nr=0; - int t=(cinfo[i].ba-start)>>2; + int t=(cinfo[i].ba-st->start)>>2; for(hr=0;hr=0) { @@ -8094,12 +8128,12 @@ static noinline void pass4_cull_unused_regs(void) // Conditional branch may need registers for following instructions if (!dops[i].is_ujump) { - if(islen-2) { nr |= last_needed_regs[(i+2) & 3]; for(hr=0;hr=0&&get_reg(regs[i+2].regmap_entry,regmap_pre[i+2][hr])<0) nr&=~(1<=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",start+i*4,hr,regmap_entry[i+2][hr]); + //if((regmap_entry[i+2][hr])>=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",st->start+i*4,hr,regmap_entry[i+2][hr]); } } } @@ -8129,7 +8163,7 @@ static noinline void pass4_cull_unused_regs(void) } else // Non-branch { - if(islen-1) { for(hr=0;hr=0&&get_reg(regs[i+1].regmap_entry,regmap_pre[i+1][hr])<0) nr&=~(1<slen-2) { regmap_pre[i+2][hr]=-1; regs[i+2].wasconst&=~(1<slen-1&&!dops[i].is_ds) { assert(regs[i].regmap[hr]<64); if(regmap_pre[i+1][hr]!=-1 || regs[i].regmap[hr]>0) if(regmap_pre[i+1][hr]!=regs[i].regmap[hr]) { SysPrintf_lim("fail: %x (%d %d!=%d)\n", - start+i*4, hr, regmap_pre[i+1][hr], regs[i].regmap[hr]); + st->start+i*4, hr, regmap_pre[i+1][hr], regs[i].regmap[hr]); assert(regmap_pre[i+1][hr]==regs[i].regmap[hr]); } regmap_pre[i+1][hr]=-1; @@ -8276,23 +8310,23 @@ static noinline void pass4_cull_unused_regs(void) // If a register is allocated during a loop, try to allocate it for the // entire loop, if possible. This avoids loading/storing registers // inside of the loop. -static noinline void pass5a_preallocate1(void) +static noinline void pass5a_preallocate1(struct compile_state *st) { int i, j, hr; signed char f_regmap[HOST_REGS]; clear_all_regs(f_regmap); - for(i=0;islen-1;i++) { if(dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP) { - if(cinfo[i].ba>=start && cinfo[i].ba<(start+i*4)) + if(cinfo[i].ba>=st->start && cinfo[i].ba<(st->start+i*4)) if(dops[i+1].itype==NOP||dops[i+1].itype==MOV||dops[i+1].itype==ALU ||dops[i+1].itype==SHIFTIMM||dops[i+1].itype==IMM16||dops[i+1].itype==LOAD ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR ||dops[i+1].itype==SHIFT ||dops[i+1].itype==COP2||dops[i+1].itype==C2LS||dops[i+1].itype==C2OP) { - int t=(cinfo[i].ba-start)>>2; + int t=(cinfo[i].ba-st->start)>>2; if(t > 0 && !dops[t-1].is_jump) // loop_preload can't handle jumps into delay slots if(t<2||(dops[t-2].itype!=UJUMP&&dops[t-2].itype!=RJUMP)||dops[t-2].rt1!=31) // call/ret assumes no registers allocated for(hr=0;hrslen-1;j++) { if(regs[j].regmap[HOST_CCREG]!=-1) break; if(count_free_regs(regs[j].regmap)<=cinfo[j].min_free_regs) { //printf("no free regs for store %x\n",start+j*4); @@ -8569,9 +8603,9 @@ static noinline void pass5a_preallocate1(void) // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. -static noinline void pass5b_preallocate2(void) +static noinline void pass5b_preallocate2(struct compile_state *st) { - int i, hr, limit = min(slen - 1, MAXBLOCK - 2); + int i, hr, limit = min(st->slen - 1, MAXBLOCK - 2); for (i = 0; i < limit; i++) { if (!i || !dops[i-1].is_jump) @@ -8754,15 +8788,17 @@ static noinline void pass5b_preallocate2(void) // Write back dirty registers as soon as we will no longer modify them, // so that we don't end up with lots of writes at the branches. -static noinline void pass6_clean_registers(int istart, int iend, int wr) +static noinline void pass6_clean_registers(struct compile_state *st, + int istart, int iend, int wr) { static u_int wont_dirty[MAXBLOCK]; static u_int will_dirty[MAXBLOCK]; + u_int start = st->start; int i; int r; u_int will_dirty_i,will_dirty_next,temp_will_dirty; u_int wont_dirty_i,wont_dirty_next,temp_wont_dirty; - if(iend==slen-1) { + if(iend==st->slen-1) { will_dirty_i=will_dirty_next=0; wont_dirty_i=wont_dirty_next=0; }else{ @@ -8781,7 +8817,7 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) signed char branch_rregmap_i[RRMAP_SIZE]; u_int branch_hr_candirty = 0; make_rregs(branch_regs[i].regmap, branch_rregmap_i, &branch_hr_candirty); - if(cinfo[i].ba=(start+slen*4)) + if (cinfo[i].ba < start || cinfo[i].ba >= (start + st->slen*4)) { // Branch out of this block, flush all regs will_dirty_i = 0; @@ -8909,7 +8945,7 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) if(wr) { will_dirty[i]=temp_will_dirty; wont_dirty[i]=temp_wont_dirty; - pass6_clean_registers((cinfo[i].ba-start)>>2,i-1,0); + pass6_clean_registers(st, (cinfo[i].ba-start)>>2,i-1,0); }else{ // Limit recursion. It can take an excessive amount // of time if there are a lot of nested loops. @@ -9216,8 +9252,8 @@ static struct block_info *block_info_new(u_int start, u_int len, return block; } -static void block_info_finish(struct block_info *block_tmp, u_char *beginning, - u_char *end) +static void block_info_finish(struct compile_state *st, struct block_info *block_tmp, + u_char *beginning, u_char *end) { u_int page = get_page(block_tmp->start); struct block_info *block; @@ -9251,9 +9287,9 @@ static void block_info_finish(struct block_info *block_tmp, u_char *beginning, block->tc_len = end - beginning; jump_outs = get_jump_outs(block); - for (i = j = 0; i < linkcount; i++) - if (!link_addr[i].internal) - jump_outs[j++] = link_addr[i].target; + for (i = j = 0; i < st->linkcount; i++) + if (!st->link_addr[i].internal) + jump_outs[j++] = st->link_addr[i].target; assert(j == block->jump_out_cnt); // insert sorted by start mirror-unmasked vaddr @@ -9274,6 +9310,7 @@ static void block_info_finish(struct block_info *block_tmp, u_char *beginning, static int noinline new_recompile_block(u_int addr) { + struct compile_state st; u_int pagelimit = 0; u_int state_rflags = 0; int i; @@ -9294,26 +9331,29 @@ static int noinline new_recompile_block(u_int addr) state_rflags |= 1 << i; } - start = addr; + st.source = NULL; + st.start = addr; + st.slen = 0; + st.linkcount = 0; ndrc_g.did_compile++; - if (Config.HLE && start == 0x80001000) // hlecall + if (Config.HLE && st.start == 0x80001000) // hlecall { void *beginning = start_block(16*4); - emit_movimm(start,0); + emit_movimm(st.start,0); emit_writeword(0,&psxRegs.pc); emit_far_jump(new_dyna_leave); literal_pool(0); end_block(beginning); - struct block_info *block = block_info_new(start, 4, NULL, 1, 0); - block->jump_in[0].vaddr = start; + struct block_info *block = block_info_new(st.start, 4, NULL, 1, 0); + block->jump_in[0].vaddr = st.start; block->jump_in[0].addr = beginning; - block_info_finish(block, beginning, out); + block_info_finish(&st, block, beginning, out); return 0; } else if (f1_hack && hack_addr == 0) { void *beginning = start_block(64*4); - emit_movimm(start, 0); + emit_movimm(st.start, 0); emit_writeword(0, &hack_addr); emit_readword(&psxRegs.GPR.n.sp, 0); emit_readptr(&mem_rtab, 1); @@ -9329,18 +9369,18 @@ static int noinline new_recompile_block(u_int addr) literal_pool(0); end_block(beginning); - struct block_info *block = block_info_new(start, 4, NULL, 1, 0); - block->jump_in[0].vaddr = start; + struct block_info *block = block_info_new(st.start, 4, NULL, 1, 0); + block->jump_in[0].vaddr = st.start; block->jump_in[0].addr = beginning; - block_info_finish(block, beginning, out); - SysPrintf("F1 hack to %08x\n", start); + block_info_finish(&st, block, beginning, out); + SysPrintf("F1 hack to %08x\n", st.start); return 0; } cycle_multiplier_active = get_cycle_multiplier(); - source = get_source_start(start, &pagelimit); - if (source == NULL) { + st.source = get_source_start(st.start, &pagelimit); + if (st.source == NULL) { if (addr != hack_addr) { SysPrintf_lim("Compile at bogus memory address: %08x, ra=%x\n", addr, psxRegs.GPR.n.ra); @@ -9362,39 +9402,39 @@ static int noinline new_recompile_block(u_int addr) /* Pass 1 disassembly */ - pass1a_disassemble(pagelimit); - pass1b_bt(); + pass1a_disassemble(&st, pagelimit); + pass1b_bt(&st); - int clear_hack_addr = apply_hacks(); + int clear_hack_addr = apply_hacks(&st); /* Pass 2 - unneeded, register dependencies */ - pass2a_unneeded(); - pass2b_unneeded_regs(0, slen-1, 0); + pass2a_unneeded(&st); + pass2b_unneeded_regs(&st, 0, st.slen-1, 0); /* Pass 3 - Register allocation */ - pass3_register_alloc(addr); + pass3_register_alloc(&st, addr); /* Pass 4 - Cull unused host registers */ - pass4_cull_unused_regs(); + pass4_cull_unused_regs(&st); /* Pass 5 - Pre-allocate registers */ - pass5a_preallocate1(); - pass5b_preallocate2(); + pass5a_preallocate1(&st); + pass5b_preallocate2(&st); /* Pass 6 - Optimize clean/dirty state */ - pass6_clean_registers(0, slen-1, 1); + pass6_clean_registers(&st, 0, st.slen - 1, 1); /* Pass 7 */ - for (i=slen-1;i>=0;i--) + for (i = st.slen - 1; i >= 0; i--) { if(dops[i].itype==CJUMP||dops[i].itype==SJUMP) { // Conditional branch - if((source[i]>>16)!=0x1000&&i>16)!=0x1000&&i\n"); drc_dbg_emit_do_cmp(i, cinfo[i].ccadj); if (clear_hack_addr) { @@ -9479,7 +9520,7 @@ static int noinline new_recompile_block(u_int addr) wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty); load_regs(regs[i].regmap_entry,regs[i].regmap,dops[i].rs1,dops[i].rs2); address_generation(i,®s[i],regs[i].regmap_entry); - load_consts(regmap_pre[i],regs[i].regmap,i); + load_consts(&st,regmap_pre[i],regs[i].regmap,i); if(dops[i].is_jump) { // Load the delay slot registers if necessary @@ -9492,7 +9533,7 @@ static int noinline new_recompile_block(u_int addr) if (dops[i+1].is_store) load_reg(regs[i].regmap_entry,regs[i].regmap,INVCP); } - else if(i+1 0); - if (slen > 0 && dops[slen-1].itype == INTCALL) { + assert(st.slen > 0); + if (st.slen > 0 && dops[st.slen-1].itype == INTCALL) { // no ending needed for this block since INTCALL never returns } // If the block did not end with an unconditional branch, @@ -9529,19 +9570,19 @@ static int noinline new_recompile_block(u_int addr) else if (i > 1) { if (!dops[i-2].is_ujump) { assert(!dops[i-1].is_jump); - assert(i==slen); + assert(i==st.slen); if(dops[i-2].itype!=CJUMP&&dops[i-2].itype!=SJUMP) { - store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); + store_regs_bt(&st,regs[i-1].regmap,regs[i-1].dirty,st.start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_addimm(HOST_CCREG, cinfo[i-1].ccadj + CLOCK_ADJUST(1), HOST_CCREG); } else { - store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].dirty,start+i*4); + store_regs_bt(&st,branch_regs[i-2].regmap,branch_regs[i-2].dirty,st.start+i*4); assert(branch_regs[i-2].regmap[HOST_CCREG]==CCREG); } - add_to_linker(out,start+i*4,0); + add_to_linker(&st,out,st.start+i*4,0); emit_jmp(0); } } @@ -9549,11 +9590,11 @@ static int noinline new_recompile_block(u_int addr) { assert(i>0); assert(!dops[i-1].is_jump); - store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4); + store_regs_bt(&st, regs[i-1].regmap, regs[i-1].dirty, st.start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); emit_addimm(HOST_CCREG, cinfo[i-1].ccadj + CLOCK_ADJUST(1), HOST_CCREG); - add_to_linker(out,start+i*4,0); + add_to_linker(&st,out,st.start+i*4,0); emit_jmp(0); } @@ -9567,28 +9608,35 @@ static int noinline new_recompile_block(u_int addr) case LOADW_STUB: case LOADBU_STUB: case LOADHU_STUB: - do_readstub(i);break; + do_readstub(&st, i); + break; case STOREB_STUB: case STOREH_STUB: case STOREW_STUB: - do_writestub(i);break; + do_writestub(&st, i); + break; case CC_STUB: - do_ccstub(i);break; + do_ccstub(&st, i); + break; case INVCODE_STUB: - do_invstub(i);break; + do_invstub(&st, i); + break; case STORELR_STUB: - do_unalignedwritestub(i);break; + do_unalignedwritestub(&st, i); + break; case OVERFLOW_STUB: - do_overflowstub(i); break; + do_overflowstub(&st, i); + break; case ALIGNMENT_STUB: - do_alignmentstub(i); break; + do_alignmentstub(&st, i); + break; default: assert(0); } } if (instr_addr0_override) - instr_addr[0] = instr_addr0_override; + st.instr_addr[0] = instr_addr0_override; #if 0 /* check for improper expiration */ @@ -9603,68 +9651,68 @@ static int noinline new_recompile_block(u_int addr) /* Pass 9 - Linker */ u_int jump_out_count = 0; - for (i = 0; i < linkcount; i++) + for (i = 0; i < st.linkcount; i++) { assem_debug("link: %p -> %08x\n", - log_addr(link_addr[i].addr), link_addr[i].target); + log_addr(st.link_addr[i].addr), st.link_addr[i].target); literal_pool(64); - if (!link_addr[i].internal) + if (!st.link_addr[i].internal) { void *stub = out; - void *addr = check_addr(link_addr[i].target); - emit_extjump_stub(link_addr[i].addr, link_addr[i].target); + void *addr = check_addr(st.link_addr[i].target); + emit_extjump_stub(st.link_addr[i].addr, st.link_addr[i].target); jump_out_count++; if (addr) { - set_jump_target(link_addr[i].addr, addr); - ndrc_add_jump_out(link_addr[i].target, stub); + set_jump_target(st.link_addr[i].addr, addr); + ndrc_add_jump_out(st.link_addr[i].target, stub); } else - set_jump_target(link_addr[i].addr, stub); + set_jump_target(st.link_addr[i].addr, stub); } else { // Internal branch - int target=(link_addr[i].target-start)>>2; - assert(target>=0&&target> 2; + assert(target>=0&&target 4) + u_int source_len = st.slen*4; + if (dops[st.slen-1].itype == INTCALL && !dops[st.slen-1].bt && source_len > 4) // no need to treat the last instruction as compiled // as the interpreter fully handles it source_len -= 4; // External Branch Targets (jump_in) int jump_in_count = 1; - assert(instr_addr[0]); - for (i = 1; i < slen; i++) + assert(st.instr_addr[0]); + for (i = 1; i < st.slen; i++) { - if (dops[i].bt && instr_addr[i]) + if (dops[i].bt && st.instr_addr[i]) jump_in_count++; } struct block_info *block = - block_info_new(start, source_len, source, jump_in_count, jump_out_count); + block_info_new(st.start, source_len, st.source, jump_in_count, jump_out_count); block->reg_sv_flags = state_rflags; int jump_in_i = 0; - for (i = 0; i < slen; i++) + for (i = 0; i < st.slen; i++) { - if ((i == 0 || dops[i].bt) && instr_addr[i]) + if ((i == 0 || dops[i].bt) && st.instr_addr[i]) { - assem_debug("%p (%d) <- %8x\n", log_addr(instr_addr[i]), i, start + i*4); - u_int vaddr = start + i*4; + assem_debug("%p (%d) <- %8x\n", log_addr(st.instr_addr[i]), i, st.start + i*4); + u_int vaddr = st.start + i*4; literal_pool(256); void *entry = out; load_regs_entry(i); if (entry == out) - entry = instr_addr[i]; + entry = st.instr_addr[i]; else - emit_jmp(instr_addr[i]); + emit_jmp(st.instr_addr[i]); block->jump_in[jump_in_i].vaddr = vaddr; block->jump_in[jump_in_i].addr = entry; @@ -9678,12 +9726,12 @@ static int noinline new_recompile_block(u_int addr) assert(out - (u_char *)beginning < MAX_OUTPUT_BLOCK_SIZE); end_block(beginning); - block_info_finish(block, beginning, out); + block_info_finish(&st, block, beginning, out); block = NULL; // Trap writes to any of the pages we compiled if (source_len) - mark_invalid_code(start, source_len, 0); + mark_invalid_code(st.start, source_len, 0); #ifdef ASSEM_PRINT fflush(stdout); -- 2.47.3