static struct block_info *blocks[PAGE_COUNT];
static struct block_info *block_oldest, *block_last_compiled;
static struct jump_info *jumps[PAGE_COUNT]; // [<target_page>]
- static u_int start;
- static u_int *source;
static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
static uint64_t gte_rt[MAXBLOCK];
static uint64_t gte_unneeded[MAXBLOCK];
static uint32_t constmap[MAXBLOCK][HOST_REGS];
static struct regstat regs[MAXBLOCK];
static struct regstat branch_regs[MAXBLOCK];
- static int slen;
- static void *instr_addr[MAXBLOCK];
- static struct link_entry link_addr[MAXBLOCK];
- static int linkcount;
static struct code_stub stubs[MAXBLOCK*3];
static int stubcount;
static u_int literals[1024][2];
static int literalcount;
- static int is_delayslot;
static u_int stop_after_jal;
static u_int ni_count;
static u_int err_print_count;
#define stat_clear(s)
#endif
+struct compile_state
+{
+ u_int *source;
+ u_int start;
+ int slen;
+ int is_delayslot;
+ int linkcount;
+ void *instr_addr[MAXBLOCK];
+ struct link_entry link_addr[MAXBLOCK];
+};
+
#define HACK_ENABLED(x) ((ndrc_g.hacks | ndrc_g.hacks_pergame) & (x))
/* registers that may be allocated */
static int new_recompile_block(u_int addr);
static void invalidate_block(struct block_info *block);
-static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_);
+static void exception_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_);
// Needed by assembler
static void wb_register(signed char r, const signed char regmap[], u_int dirty);
static void wb_dirtys(const signed char i_regmap[], u_int i_dirty);
-static void wb_needed_dirtys(const signed char i_regmap[], u_int i_dirty, int addr);
+static void wb_needed_dirtys(struct compile_state *st, const signed char i_regmap[],
+ u_int i_dirty, int addr);
static void load_all_regs(const signed char i_regmap[]);
static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[]);
static void load_regs_entry(int t);
static void load_all_consts(const signed char regmap[], u_int dirty, int i);
static u_int get_host_reglist(const signed char *regmap);
-static int get_final_value(int hr, int i, u_int *value);
+static int get_final_value(struct compile_state *st, int hr, int i, u_int *value);
static void add_stub(enum stub_type type, void *addr, void *retaddr,
u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e);
static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
int i, int addr_reg, const struct regstat *i_regs, int ccadj, u_int reglist);
-static void add_to_linker(void *addr, u_int target, int ext);
+static void add_to_linker(struct compile_state *st, void *addr, u_int target, int ext);
static void *get_direct_memhandler(void *table, u_int addr,
enum stub_type type, uintptr_t *addr_host);
-static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist);
+static void cop2_do_stall_check(struct compile_state *st, u_int op, int i,
+ const struct regstat *i_regs, u_int reglist);
static void pass_args(int a0, int a1);
static void emit_far_jump(const void *f);
static void emit_far_call(const void *f);
// Least soon needed registers
// Look at the next ten instructions and see which registers
// will be used. Try not to reallocate these.
-static void lsn(u_char hsn[], int i)
+static void lsn(struct compile_state *st, u_char hsn[], int i)
{
int j;
int b=-1;
for(j=0;j<9;j++)
{
- if(i+j>=slen) {
- j=slen-i-1;
+ if(i+j >= st->slen) {
+ j = st->slen-i-1;
break;
}
if (dops[i+j].is_ujump)
}
if(b>=0)
{
- if(cinfo[i+b].ba>=start && cinfo[i+b].ba<(start+slen*4))
+ if(cinfo[i+b].ba >= st->start && cinfo[i+b].ba < (st->start + st->slen*4))
{
// Follow first branch
- int t=(cinfo[i+b].ba-start)>>2;
- j=7-b;if(t+j>=slen) j=slen-t-1;
+ int t=(cinfo[i+b].ba-st->start)>>2;
+ j=7-b;
+ if (t+j >= st->slen) j = st->slen-t-1;
for(;j>=0;j--)
{
if(dops[t+j].rs1) if(hsn[dops[t+j].rs1]>j+b+2) hsn[dops[t+j].rs1]=j+b+2;
}
// We only want to allocate registers if we're going to use them again soon
-static int needed_again(int r, int i)
+static int needed_again(struct compile_state *st, int r, int i)
{
int j;
int b=-1;
if (i > 0 && dops[i-1].is_ujump)
{
- if(cinfo[i-1].ba<start || cinfo[i-1].ba>start+slen*4-4)
+ if(cinfo[i-1].ba < st->start || cinfo[i-1].ba > st->start + st->slen*4-4)
return 0; // Don't need any registers if exiting the block
}
for(j=0;j<9;j++)
{
- if(i+j>=slen) {
- j=slen-i-1;
+ if(i+j >= st->slen) {
+ j = st->slen-i-1;
break;
}
if (dops[i+j].is_ujump)
// Try to match register allocations at the end of a loop with those
// at the beginning
-static int loop_reg(int i, int r, int hr)
+static int loop_reg(struct compile_state *st, int i, int r, int hr)
{
int j,k;
for(j=0;j<9;j++)
{
- if(i+j>=slen) {
- j=slen-i-1;
+ if(i+j >= st->slen) {
+ j = st->slen-i-1;
break;
}
if (dops[i+j].is_ujump)
if((unneeded_reg[i+k]>>r)&1) return hr;
if(i+k>=0&&(dops[i+k].itype==UJUMP||dops[i+k].itype==CJUMP||dops[i+k].itype==SJUMP))
{
- if(cinfo[i+k].ba>=start && cinfo[i+k].ba<(start+i*4))
+ if(cinfo[i+k].ba >= st->start && cinfo[i+k].ba < (st->start+i*4))
{
- int t=(cinfo[i+k].ba-start)>>2;
+ int t=(cinfo[i+k].ba - st->start)>>2;
int reg=get_reg(regs[t].regmap_entry,r);
if(reg>=0) return reg;
//reg=get_reg(regs[t+1].regmap_entry,r);
cur->noevict |= 1u << hr;
}
-static void evict_alloc_reg(struct regstat *cur, int i, int reg, int preferred_hr)
+static void evict_alloc_reg(struct compile_state *st, struct regstat *cur,
+ int i, int reg, int preferred_hr)
{
u_char hsn[MAXREG+1];
int j, r, hr;
memset(hsn, 10, sizeof(hsn));
- lsn(hsn, i);
+ lsn(st, hsn, i);
//printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
if(i>0) {
// Don't evict the cycle count at entry points, otherwise the entry
// Note: registers are allocated clean (unmodified state)
// if you intend to modify the register, you must call dirty_reg().
-static void alloc_reg(struct regstat *cur,int i,signed char reg)
+static void alloc_reg(struct compile_state *st, struct regstat *cur,
+ int i, signed char reg)
{
int r,hr;
int preferred_reg = PREFERRED_REG_FIRST
}
// Keep the same mapping if the register was already allocated in a loop
- preferred_reg = loop_reg(i,reg,preferred_reg);
+ preferred_reg = loop_reg(st,i,reg,preferred_reg);
// Try to allocate the preferred register
if (cur->regmap[preferred_reg] == -1) {
// Ok, now we have to evict someone
// Pick a register we hopefully won't need soon
- evict_alloc_reg(cur, i, reg, preferred_reg);
+ evict_alloc_reg(st, cur, i, reg, preferred_reg);
}
// Allocate a temporary register. This is done without regard to
// dirty status or whether the register we request is on the unneeded list
// Note: This will only allocate one register, even if called multiple times
-static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
+static void alloc_reg_temp(struct compile_state *st, struct regstat *cur,
+ int i, signed char reg)
{
int r,hr;
// Ok, now we have to evict someone
// Pick a register we hopefully won't need soon
- evict_alloc_reg(cur, i, reg, 0);
+ evict_alloc_reg(st, cur, i, reg, 0);
}
-static void mov_alloc(struct regstat *current,int i)
+static void mov_alloc(struct compile_state *st, struct regstat *current, int i)
{
if (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG) {
alloc_cc(current,i); // for stalls
}
// Note: Don't need to actually alloc the source registers
- //alloc_reg(current,i,dops[i].rs1);
- alloc_reg(current,i,dops[i].rt1);
+ //alloc_reg(st, current, i, dops[i].rs1);
+ alloc_reg(st, current, i, dops[i].rt1);
clear_const(current,dops[i].rs1);
clear_const(current,dops[i].rt1);
dirty_reg(current,dops[i].rt1);
}
-static void shiftimm_alloc(struct regstat *current,int i)
+static void shiftimm_alloc(struct compile_state *st, struct regstat *current,int i)
{
if(dops[i].opcode2<=0x3) // SLL/SRL/SRA
{
if(dops[i].rt1) {
- if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
+ if(dops[i].rs1&&needed_again(st,dops[i].rs1,i)) alloc_reg(st, current,i,dops[i].rs1);
else dops[i].use_lt1=!!dops[i].rs1;
- alloc_reg(current,i,dops[i].rt1);
- dirty_reg(current,dops[i].rt1);
+ alloc_reg(st, current, i, dops[i].rt1);
+ dirty_reg( current, dops[i].rt1);
if(is_const(current,dops[i].rs1)) {
int v=get_const(current,dops[i].rs1);
if(dops[i].opcode2==0x00) set_const(current,dops[i].rt1,v<<cinfo[i].imm);
}
}
-static void shift_alloc(struct regstat *current,int i)
+static void shift_alloc(struct compile_state *st, struct regstat *current, int i)
{
if(dops[i].rt1) {
- if(dops[i].rs1) alloc_reg(current,i,dops[i].rs1);
- if(dops[i].rs2) alloc_reg(current,i,dops[i].rs2);
- alloc_reg(current,i,dops[i].rt1);
+ if(dops[i].rs1) alloc_reg(st, current, i, dops[i].rs1);
+ if(dops[i].rs2) alloc_reg(st, current, i, dops[i].rs2);
+ alloc_reg(st, current, i, dops[i].rt1);
if(dops[i].rt1==dops[i].rs2) {
- alloc_reg_temp(current,i,-1);
+ alloc_reg_temp(st, current, i, -1);
cinfo[i].min_free_regs=1;
}
clear_const(current,dops[i].rs1);
}
}
-static void alu_alloc(struct regstat *current,int i)
+static void alu_alloc(struct compile_state *st, struct regstat *current, int i)
{
if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU
if(dops[i].rt1) {
if(dops[i].rs1&&dops[i].rs2) {
- alloc_reg(current,i,dops[i].rs1);
- alloc_reg(current,i,dops[i].rs2);
+ alloc_reg(st, current, i, dops[i].rs1);
+ alloc_reg(st, current, i, dops[i].rs2);
}
else {
- if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
- if(dops[i].rs2&&needed_again(dops[i].rs2,i)) alloc_reg(current,i,dops[i].rs2);
+ if(dops[i].rs1&&needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1);
+ if(dops[i].rs2&&needed_again(st,dops[i].rs2,i)) alloc_reg(st,current,i,dops[i].rs2);
}
- alloc_reg(current,i,dops[i].rt1);
+ alloc_reg(st, current, i, dops[i].rt1);
}
if (dops[i].may_except) {
alloc_cc_optional(current, i); // for exceptions
- alloc_reg_temp(current, i, -1);
+ alloc_reg_temp(st, current, i, -1);
cinfo[i].min_free_regs = 1;
}
}
else if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU
if(dops[i].rt1) {
- alloc_reg(current,i,dops[i].rs1);
- alloc_reg(current,i,dops[i].rs2);
- alloc_reg(current,i,dops[i].rt1);
+ alloc_reg(st, current, i, dops[i].rs1);
+ alloc_reg(st, current, i, dops[i].rs2);
+ alloc_reg(st, current, i, dops[i].rt1);
}
}
else if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR
if(dops[i].rt1) {
if(dops[i].rs1&&dops[i].rs2) {
- alloc_reg(current,i,dops[i].rs1);
- alloc_reg(current,i,dops[i].rs2);
+ alloc_reg(st, current, i, dops[i].rs1);
+ alloc_reg(st, current, i, dops[i].rs2);
}
else
{
- if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
- if(dops[i].rs2&&needed_again(dops[i].rs2,i)) alloc_reg(current,i,dops[i].rs2);
+ if(dops[i].rs1&&needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1);
+ if(dops[i].rs2&&needed_again(st,dops[i].rs2,i)) alloc_reg(st,current,i,dops[i].rs2);
}
- alloc_reg(current,i,dops[i].rt1);
+ alloc_reg(st, current, i, dops[i].rt1);
}
}
clear_const(current,dops[i].rs1);
dirty_reg(current,dops[i].rt1);
}
-static void imm16_alloc(struct regstat *current,int i)
+static void imm16_alloc(struct compile_state *st, struct regstat *current, int i)
{
- if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
+ if(dops[i].rs1&&needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1);
else dops[i].use_lt1=!!dops[i].rs1;
- if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1);
+ if(dops[i].rt1) alloc_reg(st,current,i,dops[i].rt1);
if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU
clear_const(current,dops[i].rs1);
clear_const(current,dops[i].rt1);
else clear_const(current,dops[i].rt1);
if (dops[i].may_except) {
alloc_cc_optional(current, i); // for exceptions
- alloc_reg_temp(current, i, -1);
+ alloc_reg_temp(st, current, i, -1);
cinfo[i].min_free_regs = 1;
}
}
dirty_reg(current,dops[i].rt1);
}
-static void load_alloc(struct regstat *current,int i)
+static void load_alloc(struct compile_state *st, struct regstat *current, int i)
{
int need_temp = 0;
clear_const(current,dops[i].rt1);
- //if(dops[i].rs1!=dops[i].rt1&&needed_again(dops[i].rs1,i)) clear_const(current,dops[i].rs1); // Does this help or hurt?
+ //if(dops[i].rs1!=dops[i].rt1&&needed_again(st,dops[i].rs1,i)) clear_const(current,dops[i].rs1); // Does this help or hurt?
if(!dops[i].rs1) current->u&=~1LL; // Allow allocating r0 if it's the source register
- if (needed_again(dops[i].rs1, i))
- alloc_reg(current, i, dops[i].rs1);
+ if (needed_again(st,dops[i].rs1, i))
+ alloc_reg(st,current, i, dops[i].rs1);
if (ram_offset)
- alloc_reg(current, i, ROREG);
+ alloc_reg(st,current, i, ROREG);
if (dops[i].may_except) {
alloc_cc_optional(current, i); // for exceptions
need_temp = 1;
}
if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) {
- alloc_reg(current,i,dops[i].rt1);
+ alloc_reg(st,current,i,dops[i].rt1);
assert(get_reg_w(current->regmap, dops[i].rt1)>=0);
dirty_reg(current,dops[i].rt1);
// LWL/LWR need a temporary register for the old value
if(dops[i].opcode==0x22||dops[i].opcode==0x26)
{
- alloc_reg(current,i,FTEMP);
+ alloc_reg(st,current,i,FTEMP);
need_temp = 1;
}
}
// Load to r0 or unneeded register (dummy load)
// but we still need a register to calculate the address
if(dops[i].opcode==0x22||dops[i].opcode==0x26)
- alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
+ alloc_reg(st,current,i,FTEMP); // LWL/LWR need another temporary
need_temp = 1;
}
if (need_temp) {
- alloc_reg_temp(current, i, -1);
+ alloc_reg_temp(st, current, i, -1);
cinfo[i].min_free_regs = 1;
}
}
// this may eat up to 7 registers
-static void store_alloc(struct regstat *current, int i)
+static void store_alloc(struct compile_state *st, struct regstat *current, int i)
{
clear_const(current,dops[i].rs2);
if(!(dops[i].rs2)) current->u&=~1LL; // Allow allocating r0 if necessary
- if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
- alloc_reg(current,i,dops[i].rs2);
+ if(needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1);
+ alloc_reg(st,current,i,dops[i].rs2);
if (ram_offset)
- alloc_reg(current, i, ROREG);
+ alloc_reg(st,current, i, ROREG);
#if defined(HOST_IMM8)
// On CPUs without 32-bit immediates we need a pointer to invalid_code
- alloc_reg(current, i, INVCP);
+ alloc_reg(st,current, i, INVCP);
#endif
if (dops[i].opcode == 0x2a || dops[i].opcode == 0x2e) { // SWL/SWL
- alloc_reg(current,i,FTEMP);
+ alloc_reg(st,current,i,FTEMP);
}
if (dops[i].may_except)
alloc_cc_optional(current, i); // for exceptions
// We need a temporary register for address generation
- alloc_reg_temp(current,i,-1);
+ alloc_reg_temp(st,current,i,-1);
cinfo[i].min_free_regs=1;
}
-static void c2ls_alloc(struct regstat *current, int i)
+static void c2ls_alloc(struct compile_state *st, struct regstat *current, int i)
{
clear_const(current,dops[i].rt1);
- if(needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1);
- alloc_reg(current,i,FTEMP);
+ if(needed_again(st,dops[i].rs1,i)) alloc_reg(st,current,i,dops[i].rs1);
+ alloc_reg(st,current,i,FTEMP);
if (ram_offset)
- alloc_reg(current, i, ROREG);
+ alloc_reg(st,current, i, ROREG);
#if defined(HOST_IMM8)
// On CPUs without 32-bit immediates we need a pointer to invalid_code
if (dops[i].opcode == 0x3a) // SWC2
- alloc_reg(current,i,INVCP);
+ alloc_reg(st,current,i,INVCP);
#endif
if (dops[i].may_except)
alloc_cc_optional(current, i); // for exceptions
// We need a temporary register for address generation
- alloc_reg_temp(current,i,-1);
+ alloc_reg_temp(st,current,i,-1);
cinfo[i].min_free_regs=1;
}
#ifndef multdiv_alloc
-static void multdiv_alloc(struct regstat *current,int i)
+static void multdiv_alloc(struct compile_state *st, struct regstat *current, int i)
{
// case 0x18: MULT
// case 0x19: MULTU
dirty_reg(current,CCREG);
current->u &= ~(1ull << HIREG);
current->u &= ~(1ull << LOREG);
- alloc_reg(current, i, HIREG);
- alloc_reg(current, i, LOREG);
+ alloc_reg(st,current, i, HIREG);
+ alloc_reg(st,current, i, LOREG);
dirty_reg(current, HIREG);
dirty_reg(current, LOREG);
if ((dops[i].opcode2 & 0x3e) == 0x1a || (dops[i].rs1 && dops[i].rs2)) // div(u)
{
- alloc_reg(current, i, dops[i].rs1);
- alloc_reg(current, i, dops[i].rs2);
+ alloc_reg(st,current, i, dops[i].rs1);
+ alloc_reg(st,current, i, dops[i].rs2);
}
// else multiply by zero is zero
}
#endif
-static void cop0_alloc(struct regstat *current,int i)
+static void cop0_alloc(struct compile_state *st, struct regstat *current, int i)
{
if(dops[i].opcode2==0) // MFC0
{
if(dops[i].rt1) {
clear_const(current,dops[i].rt1);
- alloc_reg(current,i,dops[i].rt1);
+ alloc_reg(st,current,i,dops[i].rt1);
dirty_reg(current,dops[i].rt1);
}
}
else if(dops[i].opcode2==4) // MTC0
{
- if (((source[i]>>11)&0x1e) == 12) {
+ if (((st->source[i]>>11)&0x1e) == 12) {
alloc_cc(current, i);
dirty_reg(current, CCREG);
}
if(dops[i].rs1){
clear_const(current,dops[i].rs1);
- alloc_reg(current,i,dops[i].rs1);
+ alloc_reg(st,current,i,dops[i].rs1);
alloc_all(current,i);
}
else {
alloc_all(current,i); // FIXME: Keep r0
current->u&=~1LL;
- alloc_reg(current,i,0);
+ alloc_reg(st,current,i,0);
}
cinfo[i].min_free_regs = HOST_REGS;
}
}
-static void rfe_alloc(struct regstat *current, int i)
+static void rfe_alloc(struct compile_state *st, struct regstat *current, int i)
{
alloc_all(current, i);
cinfo[i].min_free_regs = HOST_REGS;
}
-static void cop2_alloc(struct regstat *current,int i)
+static void cop2_alloc(struct compile_state *st, struct regstat *current, int i)
{
if (dops[i].opcode2 < 3) // MFC2/CFC2
{
dirty_reg(current,CCREG);
if(dops[i].rt1){
clear_const(current,dops[i].rt1);
- alloc_reg(current,i,dops[i].rt1);
+ alloc_reg(st,current,i,dops[i].rt1);
dirty_reg(current,dops[i].rt1);
}
}
{
if(dops[i].rs1){
clear_const(current,dops[i].rs1);
- alloc_reg(current,i,dops[i].rs1);
+ alloc_reg(st,current,i,dops[i].rs1);
}
else {
current->u&=~1LL;
- alloc_reg(current,i,0);
+ alloc_reg(st,current,i,0);
}
}
- alloc_reg_temp(current,i,-1);
+ alloc_reg_temp(st,current,i,-1);
cinfo[i].min_free_regs=1;
}
-static void c2op_alloc(struct regstat *current,int i)
+static void c2op_alloc(struct compile_state *st, struct regstat *current, int i)
{
alloc_cc(current,i); // for stalls
dirty_reg(current,CCREG);
- alloc_reg_temp(current,i,-1);
+ alloc_reg_temp(st,current,i,-1);
}
-static void syscall_alloc(struct regstat *current,int i)
+static void syscall_alloc(struct compile_state *st, struct regstat *current, int i)
{
alloc_cc(current,i);
dirty_reg(current,CCREG);
current->isconst=0;
}
-static void delayslot_alloc(struct regstat *current,int i)
+static void delayslot_alloc(struct compile_state *st, struct regstat *current, int i)
{
switch(dops[i].itype) {
case UJUMP:
case SYSCALL:
case HLECALL:
case IMM16:
- imm16_alloc(current,i);
+ imm16_alloc(st, current, i);
break;
case LOAD:
case LOADLR:
- load_alloc(current,i);
+ load_alloc(st, current, i);
break;
case STORE:
case STORELR:
- store_alloc(current,i);
+ store_alloc(st, current, i);
break;
case ALU:
- alu_alloc(current,i);
+ alu_alloc(st, current, i);
break;
case SHIFT:
- shift_alloc(current,i);
+ shift_alloc(st, current, i);
break;
case MULTDIV:
- multdiv_alloc(current,i);
+ multdiv_alloc(st, current, i);
break;
case SHIFTIMM:
- shiftimm_alloc(current,i);
+ shiftimm_alloc(st, current, i);
break;
case MOV:
- mov_alloc(current,i);
+ mov_alloc(st, current, i);
break;
case COP0:
- cop0_alloc(current,i);
+ cop0_alloc(st, current, i);
break;
case RFE:
- rfe_alloc(current,i);
+ rfe_alloc(st, current, i);
break;
case COP2:
- cop2_alloc(current,i);
+ cop2_alloc(st, current, i);
break;
case C2LS:
- c2ls_alloc(current,i);
+ c2ls_alloc(st, current, i);
break;
case C2OP:
- c2op_alloc(current,i);
+ c2op_alloc(st, current, i);
break;
}
}
MTYPE_1F80,
};
-static int get_ptr_mem_type(u_int a)
+static int get_ptr_mem_type(struct compile_state *st, u_int a)
{
if(a < 0x00200000) {
- if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
+ if(a<0x1000&&((st->start>>20)==0xbfc||(st->start>>24)==0xa0))
// return wrong, must use memhandler for BIOS self-test to pass
// 007 does similar stuff from a00 mirror, weird stuff
return MTYPE_8000;
return r;
}
-static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs,
- int addr, int *offset_reg, int *addr_reg_override, int ccadj_)
+static void *emit_fastpath_cmp_jump(struct compile_state *st, int i,
+ const struct regstat *i_regs, int addr, int *offset_reg,
+ int *addr_reg_override, int ccadj_)
{
void *jaddr = NULL;
int type = 0;
assert(addr >= 0);
*offset_reg = -1;
if(((smrv_strong|smrv_weak)>>mr)&1) {
- type=get_ptr_mem_type(ndrc_smrv_regs[mr]);
- //printf("set %08x @%08x r%d %d\n", ndrc_smrv_regs[mr], start+i*4, mr, type);
+ type=get_ptr_mem_type(st, ndrc_smrv_regs[mr]);
+ //printf("set %08x @%08x r%d %d\n", ndrc_smrv_regs[mr], st->start+i*4, mr, type);
}
else {
// use the mirror we are running on
- type=get_ptr_mem_type(start);
- //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
+ type=get_ptr_mem_type(st, st->start);
+ //printf("set nospec @%08x r%d %d\n", st->start+i*4, mr, type);
}
if (dops[i].may_except) {
emit_writebyte_indexed(rt, 0, a);
}
-static void load_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void load_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
int addr = cinfo[i].addr;
int s,tl;
if(!c) {
#ifdef R29_HACK
// Strmnnrmn's speed hack
- if(dops[i].rs1!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
+ if(dops[i].rs1!=29||st->start<0x80001000||st->start>=0x80000000+RAM_SIZE)
#endif
{
- jaddr = emit_fastpath_cmp_jump(i, i_regs, addr,
+ jaddr = emit_fastpath_cmp_jump(st, i, i_regs, addr,
&offset_reg, &fastio_reg_override, ccadj_);
}
}
}
#ifndef loadlr_assemble
-static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void loadlr_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
int addr = cinfo[i].addr;
int s,tl,temp,temp2;
}else{
emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
}
- jaddr = emit_fastpath_cmp_jump(i, i_regs, temp2,
+ jaddr = emit_fastpath_cmp_jump(st, i, i_regs, temp2,
&offset_reg, &fastio_reg_override, ccadj_);
}
else {
}
#endif
-static void do_invstub(int n)
+static void do_invstub(struct compile_state *st, int n)
{
literal_pool(20);
- assem_debug("do_invstub %x\n", start + stubs[n].e*4);
+ assem_debug("do_invstub %x\n", st->start + stubs[n].e*4);
u_int reglist = stubs[n].a;
u_int addrr = stubs[n].b;
int ofs_start = stubs[n].c;
emit_jmp(stubs[n].retaddr);
}
-static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglist, int addr)
+static void do_store_smc_check(struct compile_state *st, int i,
+ const struct regstat *i_regs, u_int reglist, int addr)
{
if (HACK_ENABLED(NDHACK_NO_SMC_CHECK))
return;
return;
int j, imm_maxdiff = 32, imm_min = cinfo[i].imm, imm_max = cinfo[i].imm, count = 1;
- if (i < slen - 1 && dops[i+1].is_store && dops[i+1].rs1 == dops[i].rs1
+ if (i < st->slen - 1 && dops[i+1].is_store && dops[i+1].rs1 == dops[i].rs1
&& abs(cinfo[i+1].imm - cinfo[i].imm) <= imm_maxdiff)
return;
for (j = i - 1; j >= 0; j--) {
// (also true non-existent 0x20000000 mirror that shouldn't matter)
#define is_ram_addr(a) !((a) & 0x5f800000)
-static void store_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void store_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
int s,tl;
int addr = cinfo[i].addr;
if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
reglist |= 1u << addr;
if (!c) {
- jaddr = emit_fastpath_cmp_jump(i, i_regs, addr,
+ jaddr = emit_fastpath_cmp_jump(st, i, i_regs, addr,
&offset_reg, &fastio_reg_override, ccadj_);
}
else if (ram_offset && memtarget) {
add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj_,reglist);
}
if (!c || is_ram_addr(addr_const))
- do_store_smc_check(i, i_regs, reglist, addr);
+ do_store_smc_check(st, i, i_regs, reglist, addr);
if (c && !memtarget)
inline_writestub(type, i, addr_const, i_regs->regmap, dops[i].rs2, ccadj_, reglist);
// basic current block modification detection..
// not looking back as that should be in mips cache already
// (see Spyro2 title->attract mode)
- if (start + i*4 < addr_const && addr_const < start + slen*4) {
+ if (st->start + i*4 < addr_const && addr_const < st->start + st->slen*4) {
SysPrintf_lim("write to %08x hits block %08x, pc=%08x\n",
- addr_const, start, start+i*4);
+ addr_const, st->start, st->start+i*4);
assert(i_regs->regmap==regs[i].regmap); // not delay slot
if(i_regs->regmap==regs[i].regmap) {
load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
wb_dirtys(regs[i].regmap_entry,regs[i].wasdirty);
emit_readptr(&hash_table_ptr, 1);
- emit_movimm(start+i*4+4, 0);
+ emit_movimm(st->start+i*4+4, 0);
emit_writeword(0, &psxRegs.pc);
emit_addimm(HOST_CCREG, 2, HOST_CCREG);
emit_far_call(ndrc_get_addr_ht);
}
}
-static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void storelr_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
int addr = cinfo[i].addr;
int s,tl;
if (!c || !memtarget)
add_stub_r(STORELR_STUB,jaddr,out,i,addr,i_regs,ccadj_,reglist);
if (!c || is_ram_addr(addr_const))
- do_store_smc_check(i, i_regs, reglist, addr);
+ do_store_smc_check(st, i, i_regs, reglist, addr);
}
-static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void cop0_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
if(dops[i].opcode2==0) // MFC0
{
signed char t=get_reg_w(i_regs->regmap, dops[i].rt1);
- u_int copr=(source[i]>>11)&0x1f;
+ u_int copr=(st->source[i]>>11)&0x1f;
if(t>=0&&dops[i].rt1!=0) {
emit_readword(&psxRegs.CP0.r[copr],t);
}
{
int s = get_reg(i_regs->regmap, dops[i].rs1);
int cc = get_reg(i_regs->regmap, CCREG);
- char copr=(source[i]>>11)&0x1f;
+ char copr=(st->source[i]>>11)&0x1f;
assert(s>=0);
wb_register(dops[i].rs1,i_regs->regmap,i_regs->dirty);
if (copr == 12 || copr == 13) {
emit_add(HOST_CCREG, HOST_TEMPREG, HOST_CCREG);
emit_addimm(HOST_CCREG, ccadj_ + 2, HOST_CCREG);
emit_writeword(HOST_CCREG, &psxRegs.cycle);
- if (is_delayslot) {
+ if (st->is_delayslot) {
// burn cycles to cause cc_interrupt, which will
// reschedule next_interupt. Relies on CCREG from above.
assem_debug("MTC0 DS %d\n", copr);
emit_loadreg(dops[i].rs1,s);
return;
}
- emit_movimm(start+i*4+4,HOST_TEMPREG);
+ emit_movimm(st->start+i*4+4,HOST_TEMPREG);
emit_writeword(HOST_TEMPREG,&psxRegs.pc);
}
if (s != 2)
emit_readword(&last_count,HOST_TEMPREG);
emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
//emit_writeword(HOST_TEMPREG,&last_count);
- assert(!is_delayslot);
+ assert(!st->is_delayslot);
emit_readword(&psxRegs.pc, 0);
- emit_movimm(start+i*4+4, HOST_TEMPREG);
+ emit_movimm(st->start+i*4+4, HOST_TEMPREG);
emit_cmp(HOST_TEMPREG, 0);
void *jaddr = out;
emit_jeq(0);
emit_writeword(0, &psxRegs.CP0.n.SR);
}
-static int cop2_is_stalling_op(int i, int *cycles)
+static int cop2_is_stalling_op(struct compile_state *st, int i, int *cycles)
{
if (dops[i].itype == COP2 || dops[i].itype == C2LS) {
*cycles = 0;
return 1;
}
if (dops[i].itype == C2OP) {
- *cycles = gte_cycletab[source[i] & 0x3f];
+ *cycles = gte_cycletab[st->source[i] & 0x3f];
return 1;
}
// ... what about MTC2/CTC2/LWC2?
}
#endif
-static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist)
+static void cop2_do_stall_check(struct compile_state *st, u_int op, int i,
+ const struct regstat *i_regs, u_int reglist)
{
int j = i, cycles, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed;
int rtmp = reglist_find_free(reglist);
if (!dops[i].bt) {
for (j = i - 1; j >= 0; j--) {
//if (dops[j].is_ds) break;
- if (cop2_is_stalling_op(j, &other_gte_op_cycles) || dops[j].bt)
+ if (cop2_is_stalling_op(st, j, &other_gte_op_cycles) || dops[j].bt)
break;
if (j > 0 && cinfo[j - 1].ccadj > cinfo[j].ccadj)
break;
if (cycles == 0)
return;
other_gte_op_cycles = -1;
- for (j = i + 1; j < slen; j++) {
- if (cop2_is_stalling_op(j, &other_gte_op_cycles))
+ for (j = i + 1; j < st->slen; j++) {
+ if (cop2_is_stalling_op(st, j, &other_gte_op_cycles))
break;
if (dops[j].is_jump) {
// check ds
- if (j + 1 < slen && cop2_is_stalling_op(j + 1, &other_gte_op_cycles))
+ if (j + 1 < st->slen && cop2_is_stalling_op(st, j + 1, &other_gte_op_cycles))
j++;
break;
}
if (other_gte_op_cycles >= 0)
// will handle stall when assembling that op
return;
- cycles_passed = cinfo[min(j, slen -1)].ccadj - cinfo[i].ccadj;
+ cycles_passed = cinfo[min(j, st->slen -1)].ccadj - cinfo[i].ccadj;
if (cycles_passed >= cycles)
return;
assem_debug("; save gteBusyCycle\n");
return 1;
}
-static void multdiv_prepare_stall(int i, const struct regstat *i_regs, int ccadj_)
+static void multdiv_prepare_stall(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
int j, found = 0, c = 0;
if (HACK_ENABLED(NDHACK_NO_STALLS))
// happens occasionally... cc evicted? Don't bother then
return;
}
- for (j = i + 1; j < slen; j++) {
+ for (j = i + 1; j < st->slen; j++) {
if (dops[j].bt)
break;
if ((found = is_mflohi(j)))
break;
if (dops[j].is_jump) {
// check ds
- if (j + 1 < slen && (found = is_mflohi(j + 1)))
+ if (j + 1 < st->slen && (found = is_mflohi(j + 1)))
j++;
break;
}
}
}
-static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void c2ls_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
int s,tl;
int ar;
int fastio_reg_override = -1;
u_int addr_const = ~0;
u_int reglist=get_host_reglist(i_regs->regmap);
- u_int copr=(source[i]>>16)&0x1f;
+ u_int copr=(st->source[i]>>16)&0x1f;
s=get_reg(i_regs->regmap,dops[i].rs1);
tl=get_reg(i_regs->regmap,FTEMP);
offset=cinfo[i].imm;
}
}
- cop2_do_stall_check(0, i, i_regs, reglist);
+ cop2_do_stall_check(st, 0, i, i_regs, reglist);
if (dops[i].opcode==0x3a) { // SWC2
cop2_get_dreg(copr,tl,-1);
}
else {
if(!c) {
- jaddr2 = emit_fastpath_cmp_jump(i, i_regs, ar,
+ jaddr2 = emit_fastpath_cmp_jump(st, i, i_regs, ar,
&offset_reg, &fastio_reg_override, ccadj_);
}
else if (ram_offset && memtarget) {
if(jaddr2)
add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj_,reglist);
if (dops[i].opcode == 0x3a && (!c || is_ram_addr(addr_const))) // SWC2
- do_store_smc_check(i, i_regs, reglist, ar);
+ do_store_smc_check(st, i, i_regs, reglist, ar);
if (dops[i].opcode == 0x32) { // LWC2
host_tempreg_acquire();
cop2_put_dreg(copr,tl,HOST_TEMPREG);
}
}
-static void cop2_assemble(int i, const struct regstat *i_regs)
+static void cop2_assemble(struct compile_state *st, int i, const struct regstat *i_regs)
{
- u_int copr = (source[i]>>11) & 0x1f;
+ u_int copr = (st->source[i]>>11) & 0x1f;
signed char temp = get_reg_temp(i_regs->regmap);
if (!HACK_ENABLED(NDHACK_NO_STALLS)) {
signed char tl = get_reg(i_regs->regmap, dops[i].rt1);
reglist = reglist_exclude(reglist, tl, -1);
}
- cop2_do_stall_check(0, i, i_regs, reglist);
+ cop2_do_stall_check(st, 0, i, i_regs, reglist);
}
if (dops[i].opcode2==0) { // MFC2
signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1);
}
}
-static void do_unalignedwritestub(int n)
+static void do_unalignedwritestub(struct compile_state *st, int n)
{
- assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
+ assem_debug("do_unalignedwritestub %x\n",st->start+stubs[n].a*4);
literal_pool(256);
set_jump_target(stubs[n].addr, out);
if(cc<0)
emit_loadreg(CCREG,2);
emit_addimm(cc<0?2:cc,(int)stubs[n].d+1,2);
- emit_movimm(start + i*4,3);
+ emit_movimm(st->start + i*4,3);
emit_writeword(3,&psxRegs.pc);
emit_far_call((dops[i].opcode==0x2a?jump_handle_swl:jump_handle_swr));
emit_addimm(0,-((int)stubs[n].d+1),cc<0?2:cc);
emit_jmp(stubs[n].retaddr); // return address
}
-static void do_overflowstub(int n)
+static void do_overflowstub(struct compile_state *st, int n)
{
- assem_debug("do_overflowstub %x\n", start + (u_int)stubs[n].a * 4);
+ assem_debug("do_overflowstub %x\n", st->start + (u_int)stubs[n].a * 4);
literal_pool(24);
int i = stubs[n].a;
struct regstat *i_regs = (struct regstat *)stubs[n].c;
int ccadj = stubs[n].d;
set_jump_target(stubs[n].addr, out);
wb_dirtys(regs[i].regmap, regs[i].dirty);
- exception_assemble(i, i_regs, ccadj);
+ exception_assemble(st, i, i_regs, ccadj);
}
-static void do_alignmentstub(int n)
+static void do_alignmentstub(struct compile_state *st, int n)
{
- assem_debug("do_alignmentstub %x\n", start + (u_int)stubs[n].a * 4);
+ assem_debug("do_alignmentstub %x\n", st->start + (u_int)stubs[n].a * 4);
literal_pool(24);
int i = stubs[n].a;
struct regstat *i_regs = (struct regstat *)stubs[n].c;
if (stubs[n].b != 1)
emit_mov(stubs[n].b, 1); // faulting address
emit_movimm(cause, 0);
- exception_assemble(i, i_regs, ccadj);
+ exception_assemble(st, i, i_regs, ccadj);
}
#ifndef multdiv_assemble
}
// call interpreter, exception handler, things that change pc/regs/cycles ...
-static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, u_int pc, void *func)
+static void call_c_cpu_handler(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_, u_int pc, void *func)
{
signed char ccreg=get_reg(i_regs->regmap,CCREG);
assert(ccreg==HOST_CCREG);
- assert(!is_delayslot);
+ assert(!st->is_delayslot);
(void)ccreg;
emit_movimm(pc,3); // Get PC
emit_far_jump(jump_to_new_pc);
}
-static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void exception_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
// 'break' tends to be littered around to catch things like
// division by 0 and is almost never executed, so don't emit much code here
void *func;
if (dops[i].itype == ALU || dops[i].itype == IMM16)
- func = is_delayslot ? jump_overflow_ds : jump_overflow;
+ func = st->is_delayslot ? jump_overflow_ds : jump_overflow;
else if (dops[i].itype == LOAD || dops[i].itype == STORE)
- func = is_delayslot ? jump_addrerror_ds : jump_addrerror;
+ func = st->is_delayslot ? jump_addrerror_ds : jump_addrerror;
else if (dops[i].opcode2 == 0x0C)
- func = is_delayslot ? jump_syscall_ds : jump_syscall;
+ func = st->is_delayslot ? jump_syscall_ds : jump_syscall;
else
- func = is_delayslot ? jump_break_ds : jump_break;
+ func = st->is_delayslot ? jump_break_ds : jump_break;
if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) // evicted
emit_loadreg(CCREG, HOST_CCREG);
- emit_movimm(start + i*4, 2); // pc
+ emit_movimm(st->start + i*4, 2); // pc
emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG);
emit_far_jump(func);
}
assert(0);
}
-static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void hlecall_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
void *hlefunc = hlecall_bad;
- uint32_t hleCode = source[i] & 0x03ffffff;
+ uint32_t hleCode = st->source[i] & 0x03ffffff;
if (hleCode < ARRAY_SIZE(psxHLEt))
hlefunc = psxHLEt[hleCode];
- call_c_cpu_handler(i, i_regs, ccadj_, start + i*4+4, hlefunc);
+ call_c_cpu_handler(st, i, i_regs, ccadj_, st->start + i*4+4, hlefunc);
}
-static void intcall_assemble(int i, const struct regstat *i_regs, int ccadj_)
+static void intcall_assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
- call_c_cpu_handler(i, i_regs, ccadj_, start + i*4, execI);
+ call_c_cpu_handler(st, i, i_regs, ccadj_, st->start + i*4, execI);
}
static void speculate_mov(int rs,int rt)
}
}
-static void speculate_register_values(int i)
+static void speculate_register_values(struct compile_state *st, int i)
{
if(i==0) {
// gp,sp are likely to stay the same throughout the block
int hr = get_reg_w(regs[i].regmap, dops[i].rt1);
u_int value;
if(hr>=0) {
- if(get_final_value(hr,i,&value))
+ if (get_final_value(st, hr, i, &value))
ndrc_smrv_regs[dops[i].rt1]=value;
else ndrc_smrv_regs[dops[i].rt1]=constmap[i][hr];
smrv_strong_next|=1<<dops[i].rt1;
}
break;
case LOAD:
- if(start<0x2000&&(dops[i].rt1==26||(ndrc_smrv_regs[dops[i].rt1]>>24)==0xa0)) {
+ if(st->start<0x2000&&(dops[i].rt1==26||(ndrc_smrv_regs[dops[i].rt1]>>24)==0xa0)) {
// special case for BIOS
ndrc_smrv_regs[dops[i].rt1]=0xa0000000;
smrv_strong_next|=1<<dops[i].rt1;
}
#if 0
int r=4;
- printf("x %08x %08x %d %d c %08x %08x\n",ndrc_smrv_regs[r],start+i*4,
+ printf("x %08x %08x %d %d c %08x %08x\n",ndrc_smrv_regs[r],st->start+i*4,
((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
#endif
}
-static void ujump_assemble(int i, const struct regstat *i_regs);
-static void rjump_assemble(int i, const struct regstat *i_regs);
-static void cjump_assemble(int i, const struct regstat *i_regs);
-static void sjump_assemble(int i, const struct regstat *i_regs);
+static void ujump_assemble(struct compile_state *st, int i, const struct regstat *i_regs);
+static void rjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs);
+static void cjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs);
+static void sjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs);
-static int assemble(int i, const struct regstat *i_regs, int ccadj_)
+static int assemble(struct compile_state *st, int i,
+ const struct regstat *i_regs, int ccadj_)
{
int ds = 0;
switch (dops[i].itype) {
shiftimm_assemble(i, i_regs);
break;
case LOAD:
- load_assemble(i, i_regs, ccadj_);
+ load_assemble(st, i, i_regs, ccadj_);
break;
case LOADLR:
- loadlr_assemble(i, i_regs, ccadj_);
+ loadlr_assemble(st, i, i_regs, ccadj_);
break;
case STORE:
- store_assemble(i, i_regs, ccadj_);
+ store_assemble(st, i, i_regs, ccadj_);
break;
case STORELR:
- storelr_assemble(i, i_regs, ccadj_);
+ storelr_assemble(st, i, i_regs, ccadj_);
break;
case COP0:
- cop0_assemble(i, i_regs, ccadj_);
+ cop0_assemble(st, i, i_regs, ccadj_);
break;
case RFE:
rfe_assemble(i, i_regs);
break;
case COP2:
- cop2_assemble(i, i_regs);
+ cop2_assemble(st, i, i_regs);
break;
case C2LS:
- c2ls_assemble(i, i_regs, ccadj_);
+ c2ls_assemble(st, i, i_regs, ccadj_);
break;
case C2OP:
- c2op_assemble(i, i_regs);
+ c2op_assemble(st, i, i_regs);
break;
case MULTDIV:
multdiv_assemble(i, i_regs);
- multdiv_prepare_stall(i, i_regs, ccadj_);
+ multdiv_prepare_stall(st, i, i_regs, ccadj_);
break;
case MOV:
mov_assemble(i, i_regs);
break;
case SYSCALL:
- exception_assemble(i, i_regs, ccadj_);
+ exception_assemble(st, i, i_regs, ccadj_);
break;
case HLECALL:
- hlecall_assemble(i, i_regs, ccadj_);
+ hlecall_assemble(st, i, i_regs, ccadj_);
break;
case INTCALL:
- intcall_assemble(i, i_regs, ccadj_);
+ intcall_assemble(st, i, i_regs, ccadj_);
break;
case UJUMP:
- ujump_assemble(i, i_regs);
+ ujump_assemble(st, i, i_regs);
ds = 1;
break;
case RJUMP:
- rjump_assemble(i, i_regs);
+ rjump_assemble(st, i, i_regs);
ds = 1;
break;
case CJUMP:
- cjump_assemble(i, i_regs);
+ cjump_assemble(st, i, i_regs);
ds = 1;
break;
case SJUMP:
- sjump_assemble(i, i_regs);
+ sjump_assemble(st, i, i_regs);
ds = 1;
break;
case NOP:
return ds;
}
-static void ds_assemble(int i, const struct regstat *i_regs)
+static void ds_assemble(struct compile_state *st, int i, const struct regstat *i_regs)
{
- speculate_register_values(i);
- is_delayslot = 1;
+ speculate_register_values(st, i);
+ st->is_delayslot = 1;
switch (dops[i].itype) {
case SYSCALL:
case HLECALL:
SysPrintf("Jump in the delay slot. This is probably a bug.\n");
break;
default:
- assemble(i, i_regs, cinfo[i].ccadj);
+ assemble(st, i, i_regs, cinfo[i].ccadj);
}
- is_delayslot = 0;
+ st->is_delayslot = 0;
}
// Is the branch target a valid internal jump?
-static int internal_branch(int addr)
+static int internal_branch(struct compile_state *st, int addr)
{
- if(addr&1) return 0; // Indirect (register) jump
- if(addr>=start && addr<start+slen*4-4)
+ if (addr&1) return 0; // Indirect (register) jump
+ if (addr >= st->start && addr < st->start + st->slen*4-4)
{
return 1;
}
}
}
-static int get_final_value(int hr, int i, u_int *value)
+static int get_final_value(struct compile_state *st, int hr, int i, u_int *value)
{
int reg=regs[i].regmap[hr];
- while(i<slen-1) {
+ while(i < st->slen-1) {
if(regs[i+1].regmap[hr]!=reg) break;
if(!((regs[i+1].isconst>>hr)&1)) break;
if(dops[i+1].bt) break;
i++;
}
- if(i<slen-1) {
+ if(i<st->slen-1) {
if (dops[i].is_jump) {
*value=constmap[i][hr];
return 1;
}
*value=constmap[i][hr];
//printf("c=%lx\n",(long)constmap[i][hr]);
- if(i==slen-1) return 1;
+ if(i==st->slen-1) return 1;
assert(reg < 64);
return !((unneeded_reg[i+1]>>reg)&1);
}
// Load registers with known constants
-static void load_consts(signed char pre[],signed char regmap[],int i)
+static void load_consts(struct compile_state *st, signed char pre[],
+ signed char regmap[], int i)
{
int hr,hr2;
// propagate loaded constant flags
assert(regmap[hr]<64);
if(((regs[i].isconst>>hr)&1)&®map[hr]>0) {
u_int value, similar=0;
- if(get_final_value(hr,i,&value)) {
+ if(get_final_value(st, hr, i, &value)) {
// see if some other register has similar value
for(hr2=0;hr2<HOST_REGS;hr2++) {
if(hr2!=EXCLUDE_REG&&((regs[i].loadedconst>>hr2)&1)) {
}
if(similar) {
u_int value2;
- if(get_final_value(hr2,i,&value2)) // is this needed?
+ if(get_final_value(st, hr2, i, &value2)) // is this needed?
emit_movimm_from(value2,hr2,value,hr);
else
emit_movimm(value,hr);
// Write out dirty registers that we need to reload (pair with load_needed_regs)
// This writes the registers not written by store_regs_bt
-static void wb_needed_dirtys(const signed char i_regmap[], u_int i_dirty, int addr)
+static void wb_needed_dirtys(struct compile_state *st, const signed char i_regmap[],
+ u_int i_dirty, int addr)
{
int hr;
- int t=(addr-start)>>2;
+ int t=(addr-st->start)>>2;
for(hr=0;hr<HOST_REGS;hr++) {
if(hr!=EXCLUDE_REG) {
if(i_regmap[hr]>0) {
}
// Store dirty registers prior to branch
-static void store_regs_bt(signed char i_regmap[],uint64_t i_dirty,int addr)
+static void store_regs_bt(struct compile_state *st, signed char i_regmap[],
+ uint64_t i_dirty, int addr)
{
- if(internal_branch(addr))
+ if (internal_branch(st, addr))
{
- int t=(addr-start)>>2;
+ int t=(addr-st->start)>>2;
int hr;
for(hr=0;hr<HOST_REGS;hr++) {
if(hr!=EXCLUDE_REG) {
}
// Load all needed registers for branch target
-static void load_regs_bt(signed char i_regmap[],uint64_t i_dirty,int addr)
+static void load_regs_bt(struct compile_state *st, signed char i_regmap[],
+ uint64_t i_dirty,int addr)
{
- //if(addr>=start && addr<(start+slen*4))
- if(internal_branch(addr))
+ //if(addr>=st->start && addr<(st->start+st->slen*4))
+ if (internal_branch(st, addr))
{
- int t=(addr-start)>>2;
+ int t=(addr - st->start)>>2;
int hr;
// Store the cycle count before loading something else
if(i_regmap[HOST_CCREG]!=CCREG) {
}
}
-static int match_bt(signed char i_regmap[],uint64_t i_dirty,int addr)
+static int match_bt(struct compile_state *st, signed char i_regmap[],
+ uint64_t i_dirty, int addr)
{
- if(addr>=start && addr<start+slen*4-4)
+ if (addr >= st->start && addr < st->start + st->slen*4-4)
{
- int t=(addr-start)>>2;
+ int t=(addr-st->start)>>2;
int hr;
if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) return 0;
for(hr=0;hr<HOST_REGS;hr++)
reglist |= get_host_reglist(regs[i].regmap_entry);
reglist &= DRC_DBG_REGMASK;
- assem_debug("//do_insn_cmp %08x\n", start+i*4);
+ assem_debug("//do_insn_cmp %08x\n", st->start+i*4);
save_regs(reglist);
// write out changed consts to match the interpreter
if (i > 0 && !dops[i].bt) {
emit_movimm(cinfo[i].imm << 16, 0);
emit_storereg(dops[i].rt1, 0);
}
- emit_movimm(start+i*4,0);
+ emit_movimm(st->start+i*4,0);
emit_writeword(0,&psxRegs.pc);
int cc = get_reg(regs[i].regmap_entry, CCREG);
if (cc < 0)
#endif
// Used when a branch jumps into the delay slot of another branch
-static void ds_assemble_entry(int i)
+static void ds_assemble_entry(struct compile_state *st, int i)
{
- int t = (cinfo[i].ba - start) >> 2;
+ int t = (cinfo[i].ba - st->start) >> 2;
int ccadj_ = -CLOCK_ADJUST(1);
- if (!instr_addr[t])
- instr_addr[t] = out;
+ if (!st->instr_addr[t])
+ st->instr_addr[t] = out;
assem_debug("Assemble delay slot at %x\n",cinfo[i].ba);
assem_debug("<->\n");
drc_dbg_emit_do_cmp(t, ccadj_);
load_reg(regs[t].regmap_entry,regs[t].regmap,ROREG);
if (dops[t].is_store)
load_reg(regs[t].regmap_entry,regs[t].regmap,INVCP);
- is_delayslot=0;
+ st->is_delayslot = 0;
switch (dops[t].itype) {
case SYSCALL:
case HLECALL:
SysPrintf("Jump in the delay slot. This is probably a bug.\n");
break;
default:
- assemble(t, ®s[t], ccadj_);
+ assemble(st, t, ®s[t], ccadj_);
}
- store_regs_bt(regs[t].regmap,regs[t].dirty,cinfo[i].ba+4);
- load_regs_bt(regs[t].regmap,regs[t].dirty,cinfo[i].ba+4);
- if(internal_branch(cinfo[i].ba+4))
+ store_regs_bt(st, regs[t].regmap, regs[t].dirty, cinfo[i].ba+4);
+ load_regs_bt(st,regs[t].regmap,regs[t].dirty,cinfo[i].ba+4);
+ if (internal_branch(st, cinfo[i].ba+4))
assem_debug("branch: internal\n");
else
assem_debug("branch: external\n");
- assert(internal_branch(cinfo[i].ba+4));
- add_to_linker(out,cinfo[i].ba+4,internal_branch(cinfo[i].ba+4));
+ assert(internal_branch(st, cinfo[i].ba+4));
+ add_to_linker(st,out,cinfo[i].ba+4, internal_branch(st, cinfo[i].ba+4));
emit_jmp(0);
}
emit_movimm_from(imm1,rt1,imm2,rt2);
}
-static void do_cc(int i, const signed char i_regmap[], int *adj,
- int addr, int taken, int invert)
+static void do_cc(struct compile_state *st, int i, const signed char i_regmap[],
+ int *adj, int addr, int taken, int invert)
{
int count, count_plus2;
void *jaddr;
{
*adj=0;
}
- //if(cinfo[i].ba>=start && cinfo[i].ba<(start+slen*4))
- if(internal_branch(cinfo[i].ba))
+ //if(cinfo[i].ba>=st->start && cinfo[i].ba<(st->start+st->slen*4))
+ if(internal_branch(st,cinfo[i].ba))
{
- t=(cinfo[i].ba-start)>>2;
+ t=(cinfo[i].ba-st->start)>>2;
if(dops[t].is_ds) *adj=-CLOCK_ADJUST(1); // Branch into delay slot adds an extra cycle
else *adj=cinfo[t].ccadj;
}
}
count = cinfo[i].ccadj;
count_plus2 = count + CLOCK_ADJUST(2);
- if(taken==TAKEN && i==(cinfo[i].ba-start)>>2 && source[i+1]==0) {
+ if(taken==TAKEN && i==(cinfo[i].ba-st->start)>>2 && st->source[i+1]==0) {
// Idle loop
if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG);
idle=out;
add_stub(CC_STUB,jaddr,idle?idle:out,(*adj==0||invert||idle)?0:count_plus2,i,addr,taken,0);
}
-static void do_ccstub(int n)
+static void do_ccstub(struct compile_state *st, int n)
{
literal_pool(256);
- assem_debug("do_ccstub %x\n",start+(u_int)stubs[n].b*4);
+ assem_debug("do_ccstub %x\n",st->start+(u_int)stubs[n].b*4);
set_jump_target(stubs[n].addr, out);
int i = stubs[n].b;
int r_pc = -1;
wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty);
}
else {
- if(internal_branch(cinfo[i].ba))
- wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ if(internal_branch(st,cinfo[i].ba))
+ wb_needed_dirtys(st, branch_regs[i].regmap, branch_regs[i].dirty, cinfo[i].ba);
}
if(stubs[n].c!=-1)
{
#ifdef HAVE_CMOV_IMM
if(s2l>=0) emit_cmp(s1l,s2l);
else emit_test(s1l,s1l);
- emit_cmov2imm_e_ne_compact(cinfo[i].ba,start+i*4+8,addr);
+ emit_cmov2imm_e_ne_compact(cinfo[i].ba,st->start+i*4+8,addr);
#else
- emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt);
+ emit_mov2imm_compact(cinfo[i].ba,addr,st->start+i*4+8,alt);
if(s2l>=0) emit_cmp(s1l,s2l);
else emit_test(s1l,s1l);
emit_cmovne_reg(alt,addr);
#ifdef HAVE_CMOV_IMM
if(s2l>=0) emit_cmp(s1l,s2l);
else emit_test(s1l,s1l);
- emit_cmov2imm_e_ne_compact(start+i*4+8,cinfo[i].ba,addr);
+ emit_cmov2imm_e_ne_compact(st->start+i*4+8,cinfo[i].ba,addr);
#else
- emit_mov2imm_compact(start+i*4+8,addr,cinfo[i].ba,alt);
+ emit_mov2imm_compact(st->start+i*4+8,addr,cinfo[i].ba,alt);
if(s2l>=0) emit_cmp(s1l,s2l);
else emit_test(s1l,s1l);
emit_cmovne_reg(alt,addr);
else if (dops[i].opcode == 6) // BLEZ
{
//emit_movimm(cinfo[i].ba,alt);
- //emit_movimm(start+i*4+8,addr);
- emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr);
+ //emit_movimm(st->start+i*4+8,addr);
+ emit_mov2imm_compact(cinfo[i].ba,alt,st->start+i*4+8,addr);
emit_cmpimm(s1l,1);
emit_cmovl_reg(alt,addr);
}
else if (dops[i].opcode == 7) // BGTZ
{
//emit_movimm(cinfo[i].ba,addr);
- //emit_movimm(start+i*4+8,ntaddr);
- emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,ntaddr);
+ //emit_movimm(st->start+i*4+8,ntaddr);
+ emit_mov2imm_compact(cinfo[i].ba,addr,st->start+i*4+8,ntaddr);
emit_cmpimm(s1l,1);
emit_cmovl_reg(ntaddr,addr);
}
else if (dops[i].itype == SJUMP) // BLTZ/BGEZ
{
//emit_movimm(cinfo[i].ba,alt);
- //emit_movimm(start+i*4+8,addr);
+ //emit_movimm(st->start+i*4+8,addr);
if (dops[i].rs1) {
emit_mov2imm_compact(cinfo[i].ba,
- (dops[i].opcode2 & 1) ? addr : alt, start + i*4 + 8,
+ (dops[i].opcode2 & 1) ? addr : alt, st->start + i*4 + 8,
(dops[i].opcode2 & 1) ? alt : addr);
emit_test(s1l,s1l);
emit_cmovs_reg(alt,addr);
}
else
- emit_movimm((dops[i].opcode2 & 1) ? cinfo[i].ba : start + i*4 + 8, addr);
+ emit_movimm((dops[i].opcode2 & 1) ? cinfo[i].ba : st->start + i*4 + 8, addr);
}
r_pc = addr;
}
emit_far_call(cc_interrupt);
if(stubs[n].a) emit_addimm(HOST_CCREG,-(int)stubs[n].a,HOST_CCREG);
if(stubs[n].d==TAKEN) {
- if(internal_branch(cinfo[i].ba))
- load_needed_regs(branch_regs[i].regmap,regs[(cinfo[i].ba-start)>>2].regmap_entry);
+ if(internal_branch(st,cinfo[i].ba))
+ load_needed_regs(branch_regs[i].regmap,regs[(cinfo[i].ba-st->start)>>2].regmap_entry);
else if(dops[i].itype==RJUMP) {
if(get_reg(branch_regs[i].regmap,RTEMP)>=0)
emit_readword(&psxRegs.pc,get_reg(branch_regs[i].regmap,RTEMP));
emit_loadreg(dops[i].rs1,get_reg(branch_regs[i].regmap,dops[i].rs1));
}
}else if(stubs[n].d==NOTTAKEN) {
- if(i<slen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]);
+ if(i<st->slen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]);
else load_all_regs(branch_regs[i].regmap);
}else{
load_all_regs(branch_regs[i].regmap);
do_jump_vaddr(stubs[n].e);
}
-static void add_to_linker(void *addr, u_int target, int is_internal)
+static void add_to_linker(struct compile_state *st, void *addr, u_int target,
+ int is_internal)
{
- assert(linkcount < ARRAY_SIZE(link_addr));
- link_addr[linkcount].addr = addr;
- link_addr[linkcount].target = target;
- link_addr[linkcount].internal = is_internal;
- linkcount++;
+ assert(st->linkcount < ARRAY_SIZE(st->link_addr));
+ st->link_addr[st->linkcount].addr = addr;
+ st->link_addr[st->linkcount].target = target;
+ st->link_addr[st->linkcount].internal = is_internal;
+ st->linkcount++;
}
-static void ujump_assemble_write_ra(int i)
+static void ujump_assemble_write_ra(struct compile_state *st, int i)
{
int rt;
unsigned int return_address;
rt=get_reg(branch_regs[i].regmap,31);
//assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
//assert(rt>=0);
- return_address=start+i*4+8;
+ return_address=st->start+i*4+8;
if(rt>=0) {
#ifdef USE_MINI_HT
- if(internal_branch(return_address)&&dops[i+1].rt1!=31) {
+ if(internal_branch(st,return_address)&&dops[i+1].rt1!=31) {
int temp=-1; // note: must be ds-safe
#ifdef HOST_TEMPREG
temp=HOST_TEMPREG;
#endif
- if(temp>=0) do_miniht_insert(return_address,rt,temp);
+ if(temp>=0) do_miniht_insert(st, return_address, rt, temp);
else emit_movimm(return_address,rt);
}
else
}
}
-static void ujump_assemble(int i, const struct regstat *i_regs)
+static void ujump_assemble(struct compile_state *st, int i, const struct regstat *i_regs)
{
- if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n");
+ if(i==(cinfo[i].ba-st->start)>>2) assem_debug("idle loop\n");
address_generation(i+1,i_regs,regs[i].regmap_entry);
#ifdef REG_PREFETCH
int temp=get_reg(branch_regs[i].regmap,PTEMP);
if(dops[i].rt1==31&&temp>=0)
{
signed char *i_regmap=i_regs->regmap;
- int return_address=start+i*4+8;
+ int return_address=st->start+i*4+8;
if(get_reg(branch_regs[i].regmap,31)>0)
if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp);
}
#endif
if (dops[i].rt1 == 31)
- ujump_assemble_write_ra(i); // writeback ra for DS
- ds_assemble(i+1,i_regs);
+ ujump_assemble_write_ra(st, i); // writeback ra for DS
+ ds_assemble(st, i+1, i_regs);
uint64_t bc_unneeded=branch_regs[i].u;
bc_unneeded|=1|(1LL<<dops[i].rt1);
wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,bc_unneeded);
int cc,adj;
cc=get_reg(branch_regs[i].regmap,CCREG);
assert(cc==HOST_CCREG);
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ store_regs_bt(st, branch_regs[i].regmap, branch_regs[i].dirty, cinfo[i].ba);
#ifdef REG_PREFETCH
if(dops[i].rt1==31&&temp>=0) emit_prefetchreg(temp);
#endif
- do_cc(i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0);
+ do_cc(st,i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0);
if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
- if(internal_branch(cinfo[i].ba))
+ load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ if(internal_branch(st,cinfo[i].ba))
assem_debug("branch: internal\n");
else
assem_debug("branch: external\n");
- if (internal_branch(cinfo[i].ba) && dops[(cinfo[i].ba-start)>>2].is_ds) {
- ds_assemble_entry(i);
+ if (internal_branch(st, cinfo[i].ba) && dops[(cinfo[i].ba-st->start)>>2].is_ds) {
+ ds_assemble_entry(st, i);
}
else {
- add_to_linker(out,cinfo[i].ba,internal_branch(cinfo[i].ba));
+ add_to_linker(st, out, cinfo[i].ba, internal_branch(st,cinfo[i].ba));
emit_jmp(0);
}
}
-static void rjump_assemble_write_ra(int i)
+static void rjump_assemble_write_ra(struct compile_state *st, int i)
{
int rt,return_address;
rt=get_reg_w(branch_regs[i].regmap, dops[i].rt1);
//assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
assert(rt>=0);
- return_address=start+i*4+8;
+ return_address=st->start+i*4+8;
#ifdef REG_PREFETCH
if(temp>=0)
{
#endif
}
-static void rjump_assemble(int i, const struct regstat *i_regs)
+static void rjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs)
{
int temp;
int rs,cc;
{
if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) {
signed char *i_regmap=i_regs->regmap;
- int return_address=start+i*4+8;
+ int return_address=st->start+i*4+8;
if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp);
}
}
}
#endif
if (dops[i].rt1 != 0)
- rjump_assemble_write_ra(i);
- ds_assemble(i+1,i_regs);
+ rjump_assemble_write_ra(st, i);
+ ds_assemble(st, i+1, i_regs);
uint64_t bc_unneeded=branch_regs[i].u;
bc_unneeded|=1|(1LL<<dops[i].rt1);
bc_unneeded&=~(1LL<<dops[i].rs1);
do_rhash(rs,rh);
}
#endif
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1);
+ store_regs_bt(st, branch_regs[i].regmap,branch_regs[i].dirty,-1);
#ifdef DESTRUCTIVE_WRITEBACK
if((branch_regs[i].dirty>>rs)&1) {
if(dops[i].rs1!=dops[i+1].rt1&&dops[i].rs1!=dops[i+1].rt2) {
do_miniht_load(ht,rh);
}
#endif
- //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN);
+ //do_cc(st,i,branch_regs[i].regmap,&adj,-1,TAKEN);
//if(adj) emit_addimm(cc,2*(cinfo[i].ccadj+2-adj),cc); // ??? - Shouldn't happen
//assert(adj==0);
emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG);
emit_jmp(0);
else
emit_jns(0);
- //load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1);
+ //load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,-1);
#ifdef USE_MINI_HT
if(dops[i].rs1==31) {
do_miniht_jump(rs,rh,ht);
do_jump_vaddr(rs);
}
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- if(dops[i].rt1!=31&&i<slen-2&&(((u_int)out)&7)) emit_mov(13,13);
+ if(dops[i].rt1!=31&&i<st->slen-2&&(((u_int)out)&7)) emit_mov(13,13);
#endif
}
host_tempreg_release();
}
-static void cjump_assemble(int i, const struct regstat *i_regs)
+static void cjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs)
{
const signed char *i_regmap = i_regs->regmap;
int cc;
int match;
- match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ match = match_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
assem_debug("match=%d\n",match);
int s1l,s2l;
int unconditional=0,nop=0;
int invert=0;
- int internal=internal_branch(cinfo[i].ba);
- if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n");
+ int internal=internal_branch(st,cinfo[i].ba);
+ if(i==(cinfo[i].ba-st->start)>>2) assem_debug("idle loop\n");
if(!match) invert=1;
if (vsync_hack && (vsync_hack >> 16) == i) invert=1;
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- if(i>(cinfo[i].ba-start)>>2) invert=1;
+ if(i>(cinfo[i].ba-st->start)>>2) invert=1;
#endif
#ifdef __aarch64__
invert=1; // because of near cond. branches
// Out of order execution (delay slot first)
//printf("OOOE\n");
address_generation(i+1,i_regs,regs[i].regmap_entry);
- ds_assemble(i+1,i_regs);
+ ds_assemble(st, i+1, i_regs);
int adj;
uint64_t bc_unneeded=branch_regs[i].u;
bc_unneeded&=~((1LL<<dops[i].rs1)|(1LL<<dops[i].rs2));
cc=get_reg(branch_regs[i].regmap,CCREG);
assert(cc==HOST_CCREG);
if(unconditional)
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
- //do_cc(i,branch_regs[i].regmap,&adj,unconditional?cinfo[i].ba:-1,unconditional);
+ store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ //do_cc(st,i,branch_regs[i].regmap,&adj,unconditional?cinfo[i].ba:-1,unconditional);
//assem_debug("cycle count (adj)\n");
if(unconditional) {
- do_cc(i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0);
- if(i!=(cinfo[i].ba-start)>>2 || source[i+1]!=0) {
+ do_cc(st,i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0);
+ if(i!=(cinfo[i].ba-st->start)>>2 || st->source[i+1]!=0) {
if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
if(internal)
assem_debug("branch: internal\n");
else
assem_debug("branch: external\n");
- if (internal && dops[(cinfo[i].ba-start)>>2].is_ds) {
- ds_assemble_entry(i);
+ if (internal && dops[(cinfo[i].ba - st->start)>>2].is_ds) {
+ ds_assemble_entry(st, i);
}
else {
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st, out, cinfo[i].ba, internal);
emit_jmp(0);
}
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc);
void *jaddr=out;
emit_jns(0);
- add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
+ add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0);
}
else {
void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL;
- do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
+ do_cc(st,i,branch_regs[i].regmap,&adj,-1,0,invert);
if(adj&&!invert) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc);
//printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
nottaken=out;
emit_jne(DJT_1);
}else{
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jeq(0);
}
}
nottaken=out;
emit_jeq(DJT_1);
}else{
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jne(0);
}
}
nottaken=out;
emit_jge(DJT_1);
}else{
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jl(0);
}
}
nottaken=out;
emit_jl(DJT_1);
}else{
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jge(0);
}
}
if (vsync_hack && (vsync_hack >> 16) == i)
vsync_hack_assemble(i, vsync_hack & 0xffff, cc);
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- if (match && (!internal || !dops[(cinfo[i].ba-start)>>2].is_ds)) {
+ if (match && (!internal || !dops[(cinfo[i].ba-st->start)>>2].is_ds)) {
if(adj) {
emit_addimm(cc,-adj,cc);
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
}else{
emit_addnop(13);
- add_to_linker(out,cinfo[i].ba,internal*2);
+ add_to_linker(st,out,cinfo[i].ba,internal*2);
}
emit_jmp(0);
}else
#endif
{
if(adj) emit_addimm(cc,-adj,cc);
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
if(internal)
assem_debug("branch: internal\n");
else
assem_debug("branch: external\n");
- if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) {
- ds_assemble_entry(i);
+ if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) {
+ ds_assemble_entry(st, i);
}
else {
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jmp(0);
}
}
if (ram_offset)
load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
- ds_assemble(i+1,&branch_regs[i]);
+ ds_assemble(st, i+1, &branch_regs[i]);
drc_dbg_emit_wb_dirtys(i+1, &branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
if(cc==-1) {
// CHECK: Is the following instruction (fall thru) allocated ok?
}
assert(cc==HOST_CCREG);
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
- do_cc(i,i_regmap,&adj,cinfo[i].ba,TAKEN,0);
+ store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ do_cc(st,i,i_regmap,&adj,cinfo[i].ba,TAKEN,0);
assem_debug("cycle count (adj)\n");
if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
if(internal)
assem_debug("branch: internal\n");
else
assem_debug("branch: external\n");
- if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) {
- ds_assemble_entry(i);
+ if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) {
+ ds_assemble_entry(st, i);
}
else {
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jmp(0);
}
}
if (ram_offset)
load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
- ds_assemble(i+1,&branch_regs[i]);
+ ds_assemble(st, i+1, &branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
if (cc == -1) {
// Cycle count isn't in a register, temporarily load it then write it out
emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG);
void *jaddr=out;
emit_jns(0);
- add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
+ add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0);
emit_storereg(CCREG,HOST_CCREG);
}
else{
emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc);
void *jaddr=out;
emit_jns(0);
- add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
+ add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0);
}
}
}
}
-static void sjump_assemble(int i, const struct regstat *i_regs)
+static void sjump_assemble(struct compile_state *st, int i, const struct regstat *i_regs)
{
const signed char *i_regmap = i_regs->regmap;
int cc;
int match;
- match=match_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ match = match_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
assem_debug("smatch=%d ooo=%d\n", match, dops[i].ooo);
int s1l;
int unconditional=0,nevertaken=0;
int invert=0;
- int internal=internal_branch(cinfo[i].ba);
- if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n");
+ int internal=internal_branch(st,cinfo[i].ba);
+ if(i==(cinfo[i].ba-st->start)>>2) assem_debug("idle loop\n");
if(!match) invert=1;
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- if(i>(cinfo[i].ba-start)>>2) invert=1;
+ if(i>(cinfo[i].ba-st->start)>>2) invert=1;
#endif
#ifdef __aarch64__
invert=1; // because of near cond. branches
// Out of order execution (delay slot first)
//printf("OOOE\n");
address_generation(i+1,i_regs,regs[i].regmap_entry);
- ds_assemble(i+1,i_regs);
+ ds_assemble(st, i+1, i_regs);
int adj;
uint64_t bc_unneeded=branch_regs[i].u;
bc_unneeded&=~((1LL<<dops[i].rs1)|(1LL<<dops[i].rs2));
//assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
if(rt>=0) {
// Save the PC even if the branch is not taken
- return_address=start+i*4+8;
+ return_address=st->start+i*4+8;
emit_movimm(return_address,rt); // PC into link register
#ifdef IMM_PREFETCH
if(!nevertaken) emit_prefetch(hash_table_get(return_address));
cc=get_reg(branch_regs[i].regmap,CCREG);
assert(cc==HOST_CCREG);
if(unconditional)
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
- //do_cc(i,branch_regs[i].regmap,&adj,unconditional?cinfo[i].ba:-1,unconditional);
+ store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ //do_cc(st,i,branch_regs[i].regmap,&adj,unconditional?cinfo[i].ba:-1,unconditional);
assem_debug("cycle count (adj)\n");
if(unconditional) {
- do_cc(i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0);
- if(i!=(cinfo[i].ba-start)>>2 || source[i+1]!=0) {
+ do_cc(st,i,branch_regs[i].regmap,&adj,cinfo[i].ba,TAKEN,0);
+ if(i!=(cinfo[i].ba-st->start)>>2 || st->source[i+1]!=0) {
if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
if(internal)
assem_debug("branch: internal\n");
else
assem_debug("branch: external\n");
- if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) {
- ds_assemble_entry(i);
+ if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) {
+ ds_assemble_entry(st, i);
}
else {
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jmp(0);
}
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc);
void *jaddr=out;
emit_jns(0);
- add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
+ add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0);
}
else {
void *nottaken = NULL;
- do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
+ do_cc(st,i,branch_regs[i].regmap,&adj,-1,0,invert);
if(adj&&!invert) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc);
{
assert(s1l>=0);
nottaken=out;
emit_jns(DJT_1);
}else{
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_js(0);
}
}
nottaken=out;
emit_js(DJT_1);
}else{
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jns(0);
}
}
if(invert) {
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- if (match && (!internal || !dops[(cinfo[i].ba - start) >> 2].is_ds)) {
+ if (match && (!internal || !dops[(cinfo[i].ba - st->start) >> 2].is_ds)) {
if(adj) {
emit_addimm(cc,-adj,cc);
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
}else{
emit_addnop(13);
- add_to_linker(out,cinfo[i].ba,internal*2);
+ add_to_linker(st,out,cinfo[i].ba,internal*2);
}
emit_jmp(0);
}else
#endif
{
if(adj) emit_addimm(cc,-adj,cc);
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
if(internal)
assem_debug("branch: internal\n");
else
assem_debug("branch: external\n");
- if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) {
- ds_assemble_entry(i);
+ if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) {
+ ds_assemble_entry(st, i);
}
else {
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jmp(0);
}
}
rt = get_reg(branch_regs[i].regmap,31);
if(rt >= 0) {
// Save the PC even if the branch is not taken
- return_address = start + i*4+8;
+ return_address = st->start + i*4+8;
emit_movimm(return_address, rt); // PC into link register
#ifdef IMM_PREFETCH
emit_prefetch(hash_table_get(return_address));
if (ram_offset)
load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
- ds_assemble(i+1,&branch_regs[i]);
+ ds_assemble(st, i+1, &branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
if(cc==-1) {
emit_loadreg(CCREG,cc=HOST_CCREG);
// CHECK: Is the following instruction (fall thru) allocated ok?
}
assert(cc==HOST_CCREG);
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
- do_cc(i,i_regmap,&adj,cinfo[i].ba,TAKEN,0);
+ store_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ do_cc(st,i,i_regmap,&adj,cinfo[i].ba,TAKEN,0);
assem_debug("cycle count (adj)\n");
if(adj) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
+ load_regs_bt(st,branch_regs[i].regmap,branch_regs[i].dirty,cinfo[i].ba);
if(internal)
assem_debug("branch: internal\n");
else
assem_debug("branch: external\n");
- if (internal && dops[(cinfo[i].ba - start) >> 2].is_ds) {
- ds_assemble_entry(i);
+ if (internal && dops[(cinfo[i].ba - st->start) >> 2].is_ds) {
+ ds_assemble_entry(st, i);
}
else {
- add_to_linker(out,cinfo[i].ba,internal);
+ add_to_linker(st,out,cinfo[i].ba,internal);
emit_jmp(0);
}
}
if (ram_offset)
load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG);
load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP);
- ds_assemble(i+1,&branch_regs[i]);
+ ds_assemble(st, i+1, &branch_regs[i]);
cc=get_reg(branch_regs[i].regmap,CCREG);
if (cc == -1) {
// Cycle count isn't in a register, temporarily load it then write it out
emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), HOST_CCREG);
void *jaddr=out;
emit_jns(0);
- add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
+ add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0);
emit_storereg(CCREG,HOST_CCREG);
}
else{
emit_addimm_and_set_flags(cinfo[i].ccadj + CLOCK_ADJUST(2), cc);
void *jaddr=out;
emit_jns(0);
- add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
+ add_stub(CC_STUB,jaddr,out,0,i,st->start+i*4+8,NOTTAKEN,0);
}
}
}
}
/* disassembly */
-void disassemble_inst(int i)
+void disassemble_inst(int i, u_int start, u_int *source)
{
if (dops[i].bt) printf("*"); else printf(" ");
switch(dops[i].itype) {
}
#else
#define set_mnemonic(i_, n_)
-static void disassemble_inst(int i) {}
+static void disassemble_inst(int i, u_int start, u_int *source) {}
#endif // DISASM
#define DRC_TEST_VAL 0x74657374
cinfo[i].ba = -1;
}
-static noinline void do_vsync(int i)
+static noinline void do_vsync(struct compile_state *st, int i)
{
// lui a0, x; addiu a0, x; jal puts
u32 addr = (cinfo[i].imm << 16) + (signed short)cinfo[i+1].imm;
if (!str || strncmp(str, "VSync: timeout", 14))
return;
// jal clearPad, jal clearRCnt; j return; nop
- for (j = i+2; j < slen; j++) {
+ for (j = i+2; j < st->slen; j++) {
if (dops[j].itype == SHIFTIMM || dops[j].itype == IMM16 || dops[j].itype == ALU)
continue;
if (dops[j].opcode == 0x03) {
}
break;
}
- if (j >= slen || jals_cnt != 3 || dops[j++].opcode != 0x02)
+ if (j >= st->slen || jals_cnt != 3 || dops[j++].opcode != 0x02)
return;
- for (; j < slen; j++)
+ for (; j < st->slen; j++)
if (dops[j].itype != SHIFTIMM && dops[j].itype != IMM16)
break;
- if (j >= slen || dops[j].opcode != 0x23) // lw x, condition
+ if (j >= st->slen || dops[j].opcode != 0x23) // lw x, condition
return;
j += 2;
if (dops[j].opcode != 0 || dops[j].opcode2 != 0x2A) // slt x, y
return;
if (dops[++j].opcode != 0x05) // bnez x, loop
return;
- t = (cinfo[j].ba - start) / 4;
- if (t < 0 || t >= slen)
+ t = (cinfo[j].ba - st->start) / 4;
+ if (t < 0 || t >= st->slen)
return;
// lw x, d(sp)
if (dops[t].opcode != 0x23 || dops[t].rs1 != 29 || (u32)cinfo[t].imm >= 1024)
return;
if (dops[t+2].opcode != 0x09 || cinfo[t+2].imm != -1) // addiu x, -1
return;
- SysPrintf("vsync @%08x\n", start + t*4);
+ SysPrintf("vsync @%08x\n", st->start + t*4);
vsync_hack = (j << 16) | (cinfo[t].imm & 0xffff);
}
-static int apply_hacks(void)
+static int apply_hacks(struct compile_state *st)
{
int i;
vsync_hack = 0;
if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS))
return 0;
/* special hack(s) */
- for (i = 0; i < slen - 4; i++)
+ for (i = 0; i < st->slen - 4; i++)
{
// lui a4, 0xf200; jal <rcnt_read>; addu a0, 2; slti v0, 28224
- if (source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP
- && source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a
+ if (st->source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP
+ && st->source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a
&& cinfo[i+3].imm == 0x6e40 && dops[i+3].rs1 == 2)
{
- SysPrintf("PE2 hack @%08x\n", start + (i+3)*4);
+ SysPrintf("PE2 hack @%08x\n", st->start + (i+3)*4);
dops[i + 3].itype = NOP;
}
// see also: psxBiosCheckExe()
&& dops[i+1].opcode == 0x09 && dops[i+1].rt1 == 4 && dops[i+1].rs1 == 4
&& dops[i+2].opcode == 0x03)
{
- do_vsync(i);
+ do_vsync(st, i);
}
}
- if (source[0] == 0x3c05edb8 && source[1] == 0x34a58320)
+ if (st->source[0] == 0x3c05edb8 && st->source[1] == 0x34a58320)
{
// lui a1, 0xEDB8; ori a1, 0x8320
- SysPrintf("F1 2000 hack @%08x\n", start);
+ SysPrintf("F1 2000 hack @%08x\n", st->start);
cycle_multiplier_active = 100;
}
- i = slen;
- if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008
- && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809
+ i = st->slen;
+ if (i > 10 && st->source[i-1] == 0 && st->source[i-2] == 0x03e00008
+ && st->source[i-4] == 0x8fbf0018 && st->source[i-6] == 0x00c0f809
&& dops[i-7].itype == STORE)
{
i = i-8;
if (dops[i].itype == STORELR && dops[i].rs1 == 6
&& dops[i-1].itype == STORELR && dops[i-1].rs1 == 6)
{
- SysPrintf("F1 hack from %08x, old dst %08x\n", start, hack_addr);
+ SysPrintf("F1 hack from %08x, old dst %08x\n", st->start, hack_addr);
f1_hack = 1;
return 1;
}
#if 0 // alt vsync, not used
if (Config.HLE)
{
- if (start <= psxRegs.biosBranchCheck && psxRegs.biosBranchCheck < start + i*4)
+ if (st->start <= psxRegs.biosBranchCheck && psxRegs.biosBranchCheck < st->start + i*4)
{
- i = (psxRegs.biosBranchCheck - start) / 4u + 23;
+ i = (psxRegs.biosBranchCheck - st->start) / 4u + 23;
if (dops[i].is_jump && !dops[i+1].bt)
{
force_intcall(i);
return op->itype != CJUMP && op->itype != SJUMP;
}
-static void disassemble_one(int i, u_int src)
+static void disassemble_one(struct compile_state *st, int i, u_int src)
{
unsigned int type, op, op2, op3;
enum ls_width_type ls_type = LS_32;
break;
}
if (type == INTCALL)
- SysPrintf_lim("NI %08x @%08x (%08x)\n", src, start + i*4, start);
+ SysPrintf_lim("NI %08x @%08x (%08x)\n", src, st->start + i*4, st->start);
dops[i].itype = type;
dops[i].opcode2 = op2;
dops[i].ls_type = ls_type;
}
}
-static noinline void pass1a_disassemble(u_int pagelimit)
+static noinline void pass1a_disassemble(struct compile_state *st, u_int pagelimit)
{
int i, j, done = 0;
int ds_next = 0;
{
int force_j_to_interpreter = 0;
unsigned int type, op, op2;
+ u_int start = st->start;
- disassemble_one(i, source[i]);
+ disassemble_one(st, i, st->source[i]);
dops[i].is_ds = ds_next; ds_next = 0;
type = dops[i].itype;
op = dops[i].opcode;
/* Calculate branch target addresses */
if(type==UJUMP)
- cinfo[i].ba=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4);
+ cinfo[i].ba=((start+i*4+4)&0xF0000000)|(((unsigned int)st->source[i]<<6)>>4);
else if(type==CJUMP&&dops[i].rs1==dops[i].rs2&&(op&1))
cinfo[i].ba=start+i*4+8; // Ignore never taken branch
else if(type==SJUMP&&dops[i].rs1==0&&!(op2&1))
cinfo[i].ba=start+i*4+8; // Ignore never taken branch
else if(type==CJUMP||type==SJUMP)
- cinfo[i].ba=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14);
+ cinfo[i].ba=start+i*4+4+((signed int)((unsigned int)st->source[i]<<16)>>14);
/* simplify always (not)taken branches */
if (type == CJUMP && dops[i].rs1 == dops[i].rs2) {
dops[i].is_jump = type == RJUMP || type == UJUMP || type == CJUMP || type == SJUMP;
dops[i].is_ujump = type == RJUMP || type == UJUMP;
dops[i].is_load = type == LOAD || type == LOADLR || op == 0x32; // LWC2
- dops[i].is_delay_load = (dops[i].is_load || (source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC
+ dops[i].is_delay_load = (dops[i].is_load || (st->source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC
dops[i].is_store = type == STORE || type == STORELR || op == 0x3a; // SWC2
dops[i].is_exception = type == SYSCALL || type == HLECALL || type == INTCALL;
dops[i].may_except = dops[i].is_exception || (type == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8;
u_int limit = 0;
u_int *mem = get_source_start(cinfo[i-1].ba, &limit);
if (mem != NULL) {
- disassemble_one(MAXBLOCK - 1, mem[0]);
+ disassemble_one(st, MAXBLOCK - 1, mem[0]);
dop = &dops[MAXBLOCK - 1];
}
}
done = 2;
}
if (i >= 2) {
- if ((source[i-2] & 0xffe0f800) == 0x40806000 // MTC0 $12
+ if ((st->source[i-2] & 0xffe0f800) == 0x40806000 // MTC0 $12
|| (dops[i-2].is_jump && dops[i-2].rt1 == 31)) // call
dops[i].bt = 1;
}
- if (i >= 1 && (source[i-1] & 0xffe0f800) == 0x40806800) // MTC0 $13
+ if (i >= 1 && (st->source[i-1] & 0xffe0f800) == 0x40806800) // MTC0 $13
dops[i].bt = 1;
/* Is this the end of the block? */
if ((u_int)(t - i) < 64 && start + (t+64)*4 < pagelimit) {
// scan for a branch back to i+1
for (j = t; j < t + 64; j++) {
- int tmpop = source[j] >> 26;
+ int tmpop = st->source[j] >> 26;
if (tmpop == 1 || ((tmpop & ~3) == 4)) {
- int t2 = j + 1 + (int)(signed short)source[j];
+ int t2 = j + 1 + (int)(signed short)st->source[j];
if (t2 == i + 1) {
//printf("blk expand %08x<-%08x\n", start + (i+1)*4, start + j*4);
found_bbranch = 1;
i--;
assert(i > 0);
assert(!dops[i-1].is_jump);
- slen = i;
+ st->slen = i;
}
-static noinline void pass1b_bt(void)
+static noinline void pass1b_bt(struct compile_state *st)
{
int i;
- for (i = 0; i < slen; i++)
- if (dops[i].is_jump && start <= cinfo[i].ba && cinfo[i].ba < start+slen*4)
+ for (i = 0; i < st->slen; i++)
+ if (dops[i].is_jump && st->start <= cinfo[i].ba && cinfo[i].ba < st->start+st->slen*4)
// Internal branch, flag target
- dops[(cinfo[i].ba - start) >> 2].bt = 1;
+ dops[(cinfo[i].ba - st->start) >> 2].bt = 1;
}
// Basic liveness analysis for MIPS registers
-static noinline void pass2b_unneeded_regs(int istart, int iend, int r)
+static noinline void pass2b_unneeded_regs(struct compile_state *st,
+ int istart, int iend, int r)
{
int i;
uint64_t u,gte_u,b,gte_b;
uint64_t gte_u_unknown=0;
if (HACK_ENABLED(NDHACK_GTE_UNNEEDED))
gte_u_unknown=~0ll;
- if(iend==slen-1) {
+ if(iend==st->slen-1) {
u=1;
gte_u=gte_u_unknown;
}else{
//printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
if(dops[i].is_jump)
{
- if(cinfo[i].ba<start || cinfo[i].ba>=(start+slen*4))
+ if(cinfo[i].ba < st->start || cinfo[i].ba >= (st->start + st->slen*4))
{
// Branch out of this block, flush all regs
u=1;
}
else
{
- if(cinfo[i].ba<=start+i*4) {
+ if(cinfo[i].ba <= st->start+i*4) {
// Backward branch
if(dops[i].is_ujump)
{
// Only go three levels deep. This recursion can take an
// excessive amount of time if there are a lot of nested loops.
if(r<2) {
- pass2b_unneeded_regs((cinfo[i].ba-start)>>2, i-1, r+1);
+ pass2b_unneeded_regs(st, (cinfo[i].ba - st->start)>>2, i-1, r+1);
}else{
- unneeded_reg[(cinfo[i].ba-start)>>2]=1;
- gte_unneeded[(cinfo[i].ba-start)>>2]=gte_u_unknown;
+ unneeded_reg[(cinfo[i].ba - st->start)>>2]=1;
+ gte_unneeded[(cinfo[i].ba - st->start)>>2]=gte_u_unknown;
}
} /*else*/ if(1) {
if (dops[i].is_ujump)
{
// Unconditional branch
- u=unneeded_reg[(cinfo[i].ba-start)>>2];
- gte_u=gte_unneeded[(cinfo[i].ba-start)>>2];
+ u=unneeded_reg[(cinfo[i].ba - st->start)>>2];
+ gte_u=gte_unneeded[(cinfo[i].ba - st->start)>>2];
branch_unneeded_reg[i]=u;
// Merge in delay slot
u|=(1LL<<dops[i+1].rt1)|(1LL<<dops[i+1].rt2);
gte_u&=~gte_rs[i+1];
} else {
// Conditional branch
- b=unneeded_reg[(cinfo[i].ba-start)>>2];
- gte_b=gte_unneeded[(cinfo[i].ba-start)>>2];
+ b=unneeded_reg[(cinfo[i].ba - st->start)>>2];
+ gte_b=gte_unneeded[(cinfo[i].ba - st->start)>>2];
branch_unneeded_reg[i]=b;
// Branch delay slot
b|=(1LL<<dops[i+1].rt1)|(1LL<<dops[i+1].rt2);
gte_b&=~gte_rs[i+1];
u&=b;
gte_u&=gte_b;
- if(i<slen-1) {
+ if(i<st->slen-1) {
branch_unneeded_reg[i]&=unneeded_reg[i+2];
} else {
branch_unneeded_reg[i]=1;
unneeded_reg[i]=u;
gte_unneeded[i]=gte_u;
/*
- printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
+ printf("ur (%d,%d) %x: ",istart,iend,st->start+i*4);
printf("U:");
int r;
for(r=1;r<=CCREG;r++) {
}
}
-static noinline void pass2a_unneeded(void)
+static noinline void pass2a_unneeded(struct compile_state *st)
{
int i, j;
- for (i = 0; i < slen; i++)
+ for (i = 0; i < st->slen; i++)
{
// remove redundant alignment checks
if (dops[i].may_except && (dops[i].is_load || dops[i].is_store)
int base = dops[i].rs1, lsb = cinfo[i].imm, ls_type = dops[i].ls_type;
int mask = ls_type == LS_32 ? 3 : 1;
lsb &= mask;
- for (j = i + 1; j < slen; j++) {
+ for (j = i + 1; j < st->slen; j++) {
if (dops[j].bt || dops[j].is_jump)
break;
if ((dops[j].is_load || dops[j].is_store) && dops[j].rs1 == base
// rm redundant stack loads (unoptimized code, assuming no io mem access through sp)
if (i > 0 && dops[i].is_load && dops[i].rs1 == 29 && dops[i].ls_type == LS_32
&& dops[i-1].is_store && dops[i-1].rs1 == 29 && dops[i-1].ls_type == LS_32
- && dops[i-1].rs2 == dops[i].rt1 && !dops[i-1].is_ds && i < slen - 1
+ && dops[i-1].rs2 == dops[i].rt1 && !dops[i-1].is_ds && i < st->slen - 1
&& dops[i+1].rs1 != dops[i].rt1 && dops[i+1].rs2 != dops[i].rt1
&& !dops[i].bt && cinfo[i].imm == cinfo[i-1].imm)
{
}
}
-static noinline void pass3_register_alloc(u_int addr)
+static noinline void pass3_register_alloc(struct compile_state *st, u_int addr)
{
struct regstat current; // Current register allocations/status
clear_all_regs(current.regmap_entry);
clear_all_regs(current.regmap);
current.wasdirty = current.dirty = 0;
current.u = unneeded_reg[0];
- alloc_reg(¤t, 0, CCREG);
+ alloc_reg(st,¤t, 0, CCREG);
dirty_reg(¤t, CCREG);
current.wasconst = 0;
current.isconst = 0;
unneeded_reg[0]=1;
}
- for(i=0;i<slen;i++)
+ for(i=0;i<st->slen;i++)
{
if(dops[i].bt)
{
regs[i].isconst=0;
regs[i].loadedconst=0;
if (!dops[i].is_jump) {
- if(i+1<slen) {
+ if(i+1<st->slen) {
current.u=unneeded_reg[i+1]&~((1LL<<dops[i].rs1)|(1LL<<dops[i].rs2));
current.u|=1;
} else {
current.u=1;
}
} else {
- if(i+1<slen) {
+ if(i+1<st->slen) {
current.u=branch_unneeded_reg[i]&~((1LL<<dops[i+1].rs1)|(1LL<<dops[i+1].rs2));
current.u&=~((1LL<<dops[i].rs1)|(1LL<<dops[i].rs2));
current.u|=1;
} else {
- SysPrintf("oops, branch at end of block with no delay slot @%08x\n", start + i*4);
+ SysPrintf("oops, branch at end of block with no delay slot @%08x\n", st->start + i*4);
abort();
}
}
if(ds) {
ds=0; // Skip delay slot, already allocated as part of branch
// ...but we need to alloc it in case something jumps here
- if(i+1<slen) {
+ if(i+1<st->slen) {
current.u=branch_unneeded_reg[i-1]&unneeded_reg[i+1];
}else{
current.u=branch_unneeded_reg[i-1];
memcpy(&temp,¤t,sizeof(current));
temp.wasdirty=temp.dirty;
// TODO: Take into account unconditional branches, as below
- delayslot_alloc(&temp,i);
+ delayslot_alloc(st, &temp,i);
memcpy(regs[i].regmap,temp.regmap,sizeof(temp.regmap));
regs[i].wasdirty=temp.wasdirty;
regs[i].dirty=temp.dirty;
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
if (dops[i].rt1==31) {
- alloc_reg(¤t,i,31);
+ alloc_reg(st,¤t,i,31);
dirty_reg(¤t,31);
//assert(dops[i+1].rs1!=31&&dops[i+1].rs2!=31);
//assert(dops[i+1].rt1!=dops[i].rt1);
#ifdef REG_PREFETCH
- alloc_reg(¤t,i,PTEMP);
+ alloc_reg(st,¤t,i,PTEMP);
#endif
}
dops[i].ooo=1;
- delayslot_alloc(¤t,i+1);
+ delayslot_alloc(st, ¤t,i+1);
//current.isconst=0; // DEBUG
ds=1;
break;
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
if (!ds_writes_rjump_rs(i)) {
- alloc_reg(¤t,i,dops[i].rs1);
+ alloc_reg(st,¤t,i,dops[i].rs1);
if (dops[i].rt1!=0) {
- alloc_reg(¤t,i,dops[i].rt1);
+ alloc_reg(st,¤t,i,dops[i].rt1);
dirty_reg(¤t,dops[i].rt1);
#ifdef REG_PREFETCH
- alloc_reg(¤t,i,PTEMP);
+ alloc_reg(st,¤t,i,PTEMP);
#endif
}
#ifdef USE_MINI_HT
if(dops[i].rs1==31) { // JALR
- alloc_reg(¤t,i,RHASH);
- alloc_reg(¤t,i,RHTBL);
+ alloc_reg(st,¤t,i,RHASH);
+ alloc_reg(st,¤t,i,RHTBL);
}
#endif
- delayslot_alloc(¤t,i+1);
+ delayslot_alloc(st,¤t,i+1);
} else {
// The delay slot overwrites our source register,
// allocate a temporary register to hold the old value.
current.isconst=0;
current.wasconst=0;
regs[i].wasconst=0;
- delayslot_alloc(¤t,i+1);
+ delayslot_alloc(st,¤t,i+1);
current.isconst=0;
- alloc_reg(¤t,i,RTEMP);
+ alloc_reg(st,¤t,i,RTEMP);
}
//current.isconst=0; // DEBUG
dops[i].ooo=1;
{
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
- if(dops[i].rs1) alloc_reg(¤t,i,dops[i].rs1);
- if(dops[i].rs2) alloc_reg(¤t,i,dops[i].rs2);
+ if(dops[i].rs1) alloc_reg(st,¤t,i,dops[i].rs1);
+ if(dops[i].rs2) alloc_reg(st,¤t,i,dops[i].rs2);
if((dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2))||
(dops[i].rs2&&(dops[i].rs2==dops[i+1].rt1||dops[i].rs2==dops[i+1].rt2))) {
// The delay slot overwrites one of our conditions.
current.isconst=0;
current.wasconst=0;
regs[i].wasconst=0;
- if(dops[i].rs1) alloc_reg(¤t,i,dops[i].rs1);
- if(dops[i].rs2) alloc_reg(¤t,i,dops[i].rs2);
+ if(dops[i].rs1) alloc_reg(st,¤t,i,dops[i].rs1);
+ if(dops[i].rs2) alloc_reg(st,¤t,i,dops[i].rs2);
}
else
{
dops[i].ooo=1;
- delayslot_alloc(¤t,i+1);
+ delayslot_alloc(st,¤t,i+1);
}
}
else
{
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
- alloc_reg(¤t,i,dops[i].rs1);
+ alloc_reg(st,¤t,i,dops[i].rs1);
if(dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) {
// The delay slot overwrites one of our conditions.
// Allocate the branch condition registers instead.
current.isconst=0;
current.wasconst=0;
regs[i].wasconst=0;
- if(dops[i].rs1) alloc_reg(¤t,i,dops[i].rs1);
+ if(dops[i].rs1) alloc_reg(st,¤t,i,dops[i].rs1);
}
else
{
dops[i].ooo=1;
- delayslot_alloc(¤t,i+1);
+ delayslot_alloc(st,¤t,i+1);
}
}
else
regs[i].wasconst=0;
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
- alloc_reg(¤t,i,dops[i].rs1);
- alloc_reg(¤t,i,dops[i].rs2);
+ alloc_reg(st,¤t,i,dops[i].rs1);
+ alloc_reg(st,¤t,i,dops[i].rs2);
}
else
if((dops[i].opcode&0x3E)==0x16) // BLEZL/BGTZL
regs[i].wasconst=0;
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
- alloc_reg(¤t,i,dops[i].rs1);
+ alloc_reg(st,¤t,i,dops[i].rs1);
}
ds=1;
//current.isconst=0;
{
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
- alloc_reg(¤t,i,dops[i].rs1);
+ alloc_reg(st,¤t,i,dops[i].rs1);
if (dops[i].rt1 == 31) { // BLTZAL/BGEZAL
- alloc_reg(¤t,i,31);
+ alloc_reg(st,¤t,i,31);
dirty_reg(¤t,31);
}
if ((dops[i].rs1 &&
current.isconst=0;
current.wasconst=0;
regs[i].wasconst=0;
- if(dops[i].rs1) alloc_reg(¤t,i,dops[i].rs1);
+ if(dops[i].rs1) alloc_reg(st,¤t,i,dops[i].rs1);
}
else
{
dops[i].ooo=1;
- delayslot_alloc(¤t,i+1);
+ delayslot_alloc(st,¤t,i+1);
}
}
ds=1;
//current.isconst=0;
break;
case IMM16:
- imm16_alloc(¤t,i);
+ imm16_alloc(st, ¤t,i);
break;
case LOAD:
case LOADLR:
- load_alloc(¤t,i);
+ load_alloc(st, ¤t,i);
break;
case STORE:
case STORELR:
- store_alloc(¤t,i);
+ store_alloc(st, ¤t,i);
break;
case ALU:
- alu_alloc(¤t,i);
+ alu_alloc(st, ¤t,i);
break;
case SHIFT:
- shift_alloc(¤t,i);
+ shift_alloc(st, ¤t,i);
break;
case MULTDIV:
- multdiv_alloc(¤t,i);
+ multdiv_alloc(st, ¤t,i);
break;
case SHIFTIMM:
- shiftimm_alloc(¤t,i);
+ shiftimm_alloc(st, ¤t,i);
break;
case MOV:
- mov_alloc(¤t,i);
+ mov_alloc(st, ¤t,i);
break;
case COP0:
- cop0_alloc(¤t,i);
+ cop0_alloc(st, ¤t,i);
break;
case RFE:
- rfe_alloc(¤t,i);
+ rfe_alloc(st, ¤t,i);
break;
case COP2:
- cop2_alloc(¤t,i);
+ cop2_alloc(st, ¤t,i);
break;
case C2LS:
- c2ls_alloc(¤t,i);
+ c2ls_alloc(st, ¤t,i);
break;
case C2OP:
- c2op_alloc(¤t,i);
+ c2op_alloc(st, ¤t, i);
break;
case SYSCALL:
case HLECALL:
case INTCALL:
- syscall_alloc(¤t,i);
+ syscall_alloc(st, ¤t,i);
break;
}
alloc_cc(&branch_regs[i-1],i-1);
dirty_reg(&branch_regs[i-1],CCREG);
if(dops[i-1].rt1==31) { // JAL
- alloc_reg(&branch_regs[i-1],i-1,31);
+ alloc_reg(st,&branch_regs[i-1],i-1,31);
dirty_reg(&branch_regs[i-1],31);
}
memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<dops[i-1].rs1)|(1LL<<dops[i-1].rs2));
alloc_cc(&branch_regs[i-1],i-1);
dirty_reg(&branch_regs[i-1],CCREG);
- alloc_reg(&branch_regs[i-1],i-1,dops[i-1].rs1);
+ alloc_reg(st,&branch_regs[i-1],i-1,dops[i-1].rs1);
if(dops[i-1].rt1!=0) { // JALR
- alloc_reg(&branch_regs[i-1],i-1,dops[i-1].rt1);
+ alloc_reg(st,&branch_regs[i-1],i-1,dops[i-1].rt1);
dirty_reg(&branch_regs[i-1],dops[i-1].rt1);
}
#ifdef USE_MINI_HT
if(dops[i-1].rs1==31) { // JALR
- alloc_reg(&branch_regs[i-1],i-1,RHASH);
- alloc_reg(&branch_regs[i-1],i-1,RHTBL);
+ alloc_reg(st,&branch_regs[i-1],i-1,RHASH);
+ alloc_reg(st,&branch_regs[i-1],i-1,RHTBL);
}
#endif
memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
// Delay slot goes after the test (in order)
current.u=branch_unneeded_reg[i-1]&~((1LL<<dops[i].rs1)|(1LL<<dops[i].rs2));
current.u|=1;
- delayslot_alloc(¤t,i);
+ delayslot_alloc(st,¤t,i);
current.isconst=0;
}
else
{
current.u=branch_unneeded_reg[i-1]&~((1LL<<dops[i-1].rs1)|(1LL<<dops[i-1].rs2));
// Alloc the branch condition registers
- if(dops[i-1].rs1) alloc_reg(¤t,i-1,dops[i-1].rs1);
- if(dops[i-1].rs2) alloc_reg(¤t,i-1,dops[i-1].rs2);
+ if(dops[i-1].rs1) alloc_reg(st,¤t,i-1,dops[i-1].rs1);
+ if(dops[i-1].rs2) alloc_reg(st,¤t,i-1,dops[i-1].rs2);
}
memcpy(&branch_regs[i-1],¤t,sizeof(current));
branch_regs[i-1].isconst=0;
// Delay slot goes after the test (in order)
current.u=branch_unneeded_reg[i-1]&~((1LL<<dops[i].rs1)|(1LL<<dops[i].rs2));
current.u|=1;
- delayslot_alloc(¤t,i);
+ delayslot_alloc(st,¤t,i);
current.isconst=0;
}
else
{
current.u=branch_unneeded_reg[i-1]&~(1LL<<dops[i-1].rs1);
// Alloc the branch condition register
- alloc_reg(¤t,i-1,dops[i-1].rs1);
+ alloc_reg(st,¤t,i-1,dops[i-1].rs1);
}
memcpy(&branch_regs[i-1],¤t,sizeof(current));
branch_regs[i-1].isconst=0;
// Delay slot goes after the test (in order)
current.u=branch_unneeded_reg[i-1]&~((1LL<<dops[i].rs1)|(1LL<<dops[i].rs2));
current.u|=1;
- delayslot_alloc(¤t,i);
+ delayslot_alloc(st,¤t,i);
current.isconst=0;
}
else
{
current.u=branch_unneeded_reg[i-1]&~(1LL<<dops[i-1].rs1);
// Alloc the branch condition register
- alloc_reg(¤t,i-1,dops[i-1].rs1);
+ alloc_reg(st,¤t,i-1,dops[i-1].rs1);
}
memcpy(&branch_regs[i-1],¤t,sizeof(current));
branch_regs[i-1].isconst=0;
// Subroutine call will return here, don't alloc any registers
current.dirty=0;
clear_all_regs(current.regmap);
- alloc_reg(¤t,i,CCREG);
+ alloc_reg(st,¤t,i,CCREG);
dirty_reg(¤t,CCREG);
}
- else if(i+1<slen)
+ else if(i+1<st->slen)
{
// Internal branch will jump here, match registers to caller
current.dirty=0;
clear_all_regs(current.regmap);
- alloc_reg(¤t,i,CCREG);
+ alloc_reg(st,¤t,i,CCREG);
dirty_reg(¤t,CCREG);
for(j=i-1;j>=0;j--)
{
- if(cinfo[j].ba==start+i*4+4) {
+ if(cinfo[j].ba==st->start+i*4+4) {
memcpy(current.regmap,branch_regs[j].regmap,sizeof(current.regmap));
current.dirty=branch_regs[j].dirty;
break;
}
}
while(j>=0) {
- if(cinfo[j].ba==start+i*4+4) {
+ if(cinfo[j].ba==st->start+i*4+4) {
for(hr=0;hr<HOST_REGS;hr++) {
if(current.regmap[hr]!=branch_regs[j].regmap[hr]) {
current.regmap[hr]=-1;
cc=0;
}
#if !defined(DRC_DBG)
- else if(dops[i].itype==C2OP&>e_cycletab[source[i]&0x3f]>2)
+ else if(dops[i].itype==C2OP&>e_cycletab[st->source[i]&0x3f]>2)
{
// this should really be removed since the real stalls have been implemented,
// but doing so causes sizeable perf regression against the older version
- u_int gtec = gte_cycletab[source[i] & 0x3f];
+ u_int gtec = gte_cycletab[st->source[i] & 0x3f];
cc += HACK_ENABLED(NDHACK_NO_STALLS) ? gtec/2 : 2;
}
else if(i>1&&dops[i].itype==STORE&&dops[i-1].itype==STORE&&dops[i-2].itype==STORE&&!dops[i].bt)
}
}
-static noinline void pass4_cull_unused_regs(void)
+static noinline void pass4_cull_unused_regs(struct compile_state *st)
{
u_int last_needed_regs[4] = {0,0,0,0};
u_int nr=0;
int i;
- for (i=slen-1;i>=0;i--)
+ for (i = st->slen - 1; i >= 0; i--)
{
int hr;
__builtin_prefetch(regs[i-2].regmap);
if(dops[i].is_jump)
{
- if(cinfo[i].ba<start || cinfo[i].ba>=(start+slen*4))
+ if(cinfo[i].ba<st->start || cinfo[i].ba>=(st->start+st->slen*4))
{
// Branch out of this block, don't need anything
nr=0;
// Internal branch
// Need whatever matches the target
nr=0;
- int t=(cinfo[i].ba-start)>>2;
+ int t=(cinfo[i].ba-st->start)>>2;
for(hr=0;hr<HOST_REGS;hr++)
{
if(regs[i].regmap_entry[hr]>=0) {
// Conditional branch may need registers for following instructions
if (!dops[i].is_ujump)
{
- if(i<slen-2) {
+ if(i<st->slen-2) {
nr |= last_needed_regs[(i+2) & 3];
for(hr=0;hr<HOST_REGS;hr++)
{
if(regmap_pre[i+2][hr]>=0&&get_reg(regs[i+2].regmap_entry,regmap_pre[i+2][hr])<0) nr&=~(1<<hr);
- //if((regmap_entry[i+2][hr])>=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",start+i*4,hr,regmap_entry[i+2][hr]);
+ //if((regmap_entry[i+2][hr])>=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",st->start+i*4,hr,regmap_entry[i+2][hr]);
}
}
}
}
else // Non-branch
{
- if(i<slen-1) {
+ if(i<st->slen-1) {
for(hr=0;hr<HOST_REGS;hr++) {
if(regmap_pre[i+1][hr]>=0&&get_reg(regs[i+1].regmap_entry,regmap_pre[i+1][hr])<0) nr&=~(1<<hr);
if(regs[i].regmap[hr]!=regmap_pre[i+1][hr]) nr&=~(1<<hr);
branch_regs[i].regmap_entry[hr]=-1;
if (!dops[i].is_ujump)
{
- if (i < slen-2) {
+ if (i < st->slen-2) {
regmap_pre[i+2][hr]=-1;
regs[i+2].wasconst&=~(1<<hr);
}
//(dops[i].itype!=SPAN||regs[i].regmap[hr]!=CCREG)
regs[i].regmap[hr] != CCREG)
{
- if(i<slen-1&&!dops[i].is_ds) {
+ if(i<st->slen-1&&!dops[i].is_ds) {
assert(regs[i].regmap[hr]<64);
if(regmap_pre[i+1][hr]!=-1 || regs[i].regmap[hr]>0)
if(regmap_pre[i+1][hr]!=regs[i].regmap[hr])
{
SysPrintf_lim("fail: %x (%d %d!=%d)\n",
- start+i*4, hr, regmap_pre[i+1][hr], regs[i].regmap[hr]);
+ st->start+i*4, hr, regmap_pre[i+1][hr], regs[i].regmap[hr]);
assert(regmap_pre[i+1][hr]==regs[i].regmap[hr]);
}
regmap_pre[i+1][hr]=-1;
// If a register is allocated during a loop, try to allocate it for the
// entire loop, if possible. This avoids loading/storing registers
// inside of the loop.
-static noinline void pass5a_preallocate1(void)
+static noinline void pass5a_preallocate1(struct compile_state *st)
{
int i, j, hr;
signed char f_regmap[HOST_REGS];
clear_all_regs(f_regmap);
- for(i=0;i<slen-1;i++)
+ for(i=0;i<st->slen-1;i++)
{
if(dops[i].itype==UJUMP||dops[i].itype==CJUMP||dops[i].itype==SJUMP)
{
- if(cinfo[i].ba>=start && cinfo[i].ba<(start+i*4))
+ if(cinfo[i].ba>=st->start && cinfo[i].ba<(st->start+i*4))
if(dops[i+1].itype==NOP||dops[i+1].itype==MOV||dops[i+1].itype==ALU
||dops[i+1].itype==SHIFTIMM||dops[i+1].itype==IMM16||dops[i+1].itype==LOAD
||dops[i+1].itype==STORE||dops[i+1].itype==STORELR
||dops[i+1].itype==SHIFT
||dops[i+1].itype==COP2||dops[i+1].itype==C2LS||dops[i+1].itype==C2OP)
{
- int t=(cinfo[i].ba-start)>>2;
+ int t=(cinfo[i].ba-st->start)>>2;
if(t > 0 && !dops[t-1].is_jump) // loop_preload can't handle jumps into delay slots
if(t<2||(dops[t-2].itype!=UJUMP&&dops[t-2].itype!=RJUMP)||dops[t-2].rt1!=31) // call/ret assumes no registers allocated
for(hr=0;hr<HOST_REGS;hr++)
}
// Try to restore cycle count at branch targets
if(dops[i].bt) {
- for(j=i;j<slen-1;j++) {
+ for(j=i;j<st->slen-1;j++) {
if(regs[j].regmap[HOST_CCREG]!=-1) break;
if(count_free_regs(regs[j].regmap)<=cinfo[j].min_free_regs) {
//printf("no free regs for store %x\n",start+j*4);
// This allocates registers (if possible) one instruction prior
// to use, which can avoid a load-use penalty on certain CPUs.
-static noinline void pass5b_preallocate2(void)
+static noinline void pass5b_preallocate2(struct compile_state *st)
{
- int i, hr, limit = min(slen - 1, MAXBLOCK - 2);
+ int i, hr, limit = min(st->slen - 1, MAXBLOCK - 2);
for (i = 0; i < limit; i++)
{
if (!i || !dops[i-1].is_jump)
// Write back dirty registers as soon as we will no longer modify them,
// so that we don't end up with lots of writes at the branches.
-static noinline void pass6_clean_registers(int istart, int iend, int wr)
+static noinline void pass6_clean_registers(struct compile_state *st,
+ int istart, int iend, int wr)
{
static u_int wont_dirty[MAXBLOCK];
static u_int will_dirty[MAXBLOCK];
+ u_int start = st->start;
int i;
int r;
u_int will_dirty_i,will_dirty_next,temp_will_dirty;
u_int wont_dirty_i,wont_dirty_next,temp_wont_dirty;
- if(iend==slen-1) {
+ if(iend==st->slen-1) {
will_dirty_i=will_dirty_next=0;
wont_dirty_i=wont_dirty_next=0;
}else{
signed char branch_rregmap_i[RRMAP_SIZE];
u_int branch_hr_candirty = 0;
make_rregs(branch_regs[i].regmap, branch_rregmap_i, &branch_hr_candirty);
- if(cinfo[i].ba<start || cinfo[i].ba>=(start+slen*4))
+ if (cinfo[i].ba < start || cinfo[i].ba >= (start + st->slen*4))
{
// Branch out of this block, flush all regs
will_dirty_i = 0;
if(wr) {
will_dirty[i]=temp_will_dirty;
wont_dirty[i]=temp_wont_dirty;
- pass6_clean_registers((cinfo[i].ba-start)>>2,i-1,0);
+ pass6_clean_registers(st, (cinfo[i].ba-start)>>2,i-1,0);
}else{
// Limit recursion. It can take an excessive amount
// of time if there are a lot of nested loops.
return block;
}
-static void block_info_finish(struct block_info *block_tmp, u_char *beginning,
- u_char *end)
+static void block_info_finish(struct compile_state *st, struct block_info *block_tmp,
+ u_char *beginning, u_char *end)
{
u_int page = get_page(block_tmp->start);
struct block_info *block;
block->tc_len = end - beginning;
jump_outs = get_jump_outs(block);
- for (i = j = 0; i < linkcount; i++)
- if (!link_addr[i].internal)
- jump_outs[j++] = link_addr[i].target;
+ for (i = j = 0; i < st->linkcount; i++)
+ if (!st->link_addr[i].internal)
+ jump_outs[j++] = st->link_addr[i].target;
assert(j == block->jump_out_cnt);
// insert sorted by start mirror-unmasked vaddr
static int noinline new_recompile_block(u_int addr)
{
+ struct compile_state st;
u_int pagelimit = 0;
u_int state_rflags = 0;
int i;
state_rflags |= 1 << i;
}
- start = addr;
+ st.source = NULL;
+ st.start = addr;
+ st.slen = 0;
+ st.linkcount = 0;
ndrc_g.did_compile++;
- if (Config.HLE && start == 0x80001000) // hlecall
+ if (Config.HLE && st.start == 0x80001000) // hlecall
{
void *beginning = start_block(16*4);
- emit_movimm(start,0);
+ emit_movimm(st.start,0);
emit_writeword(0,&psxRegs.pc);
emit_far_jump(new_dyna_leave);
literal_pool(0);
end_block(beginning);
- struct block_info *block = block_info_new(start, 4, NULL, 1, 0);
- block->jump_in[0].vaddr = start;
+ struct block_info *block = block_info_new(st.start, 4, NULL, 1, 0);
+ block->jump_in[0].vaddr = st.start;
block->jump_in[0].addr = beginning;
- block_info_finish(block, beginning, out);
+ block_info_finish(&st, block, beginning, out);
return 0;
}
else if (f1_hack && hack_addr == 0) {
void *beginning = start_block(64*4);
- emit_movimm(start, 0);
+ emit_movimm(st.start, 0);
emit_writeword(0, &hack_addr);
emit_readword(&psxRegs.GPR.n.sp, 0);
emit_readptr(&mem_rtab, 1);
literal_pool(0);
end_block(beginning);
- struct block_info *block = block_info_new(start, 4, NULL, 1, 0);
- block->jump_in[0].vaddr = start;
+ struct block_info *block = block_info_new(st.start, 4, NULL, 1, 0);
+ block->jump_in[0].vaddr = st.start;
block->jump_in[0].addr = beginning;
- block_info_finish(block, beginning, out);
- SysPrintf("F1 hack to %08x\n", start);
+ block_info_finish(&st, block, beginning, out);
+ SysPrintf("F1 hack to %08x\n", st.start);
return 0;
}
cycle_multiplier_active = get_cycle_multiplier();
- source = get_source_start(start, &pagelimit);
- if (source == NULL) {
+ st.source = get_source_start(st.start, &pagelimit);
+ if (st.source == NULL) {
if (addr != hack_addr) {
SysPrintf_lim("Compile at bogus memory address: %08x, ra=%x\n",
addr, psxRegs.GPR.n.ra);
/* Pass 1 disassembly */
- pass1a_disassemble(pagelimit);
- pass1b_bt();
+ pass1a_disassemble(&st, pagelimit);
+ pass1b_bt(&st);
- int clear_hack_addr = apply_hacks();
+ int clear_hack_addr = apply_hacks(&st);
/* Pass 2 - unneeded, register dependencies */
- pass2a_unneeded();
- pass2b_unneeded_regs(0, slen-1, 0);
+ pass2a_unneeded(&st);
+ pass2b_unneeded_regs(&st, 0, st.slen-1, 0);
/* Pass 3 - Register allocation */
- pass3_register_alloc(addr);
+ pass3_register_alloc(&st, addr);
/* Pass 4 - Cull unused host registers */
- pass4_cull_unused_regs();
+ pass4_cull_unused_regs(&st);
/* Pass 5 - Pre-allocate registers */
- pass5a_preallocate1();
- pass5b_preallocate2();
+ pass5a_preallocate1(&st);
+ pass5b_preallocate2(&st);
/* Pass 6 - Optimize clean/dirty state */
- pass6_clean_registers(0, slen-1, 1);
+ pass6_clean_registers(&st, 0, st.slen - 1, 1);
/* Pass 7 */
- for (i=slen-1;i>=0;i--)
+ for (i = st.slen - 1; i >= 0; i--)
{
if(dops[i].itype==CJUMP||dops[i].itype==SJUMP)
{
// Conditional branch
- if((source[i]>>16)!=0x1000&&i<slen-2) {
+ if((st.source[i]>>16)!=0x1000&&i<st.slen-2) {
// Mark this address as a branch target since it may be called
// upon return from interrupt
dops[i+2].bt=1;
}
/* Pass 8 - Assembly */
- linkcount=0;stubcount=0;
- is_delayslot=0;
- u_int dirty_pre=0;
+ st.linkcount = 0;
+ stubcount = 0;
+ st.is_delayslot = 0;
+ u_int dirty_pre = 0;
void *beginning = start_block(MAX_OUTPUT_BLOCK_SIZE);
void *instr_addr0_override = NULL;
int ds = 0;
- if ((Config.HLE && start == 0x80000080) || start == 0x80030000) {
+ if ((Config.HLE && st.start == 0x80000080) || st.start == 0x80030000) {
instr_addr0_override = out;
- emit_movimm(start, 0);
- if (start == 0x80030000) {
+ emit_movimm(st.start, 0);
+ if (st.start == 0x80030000) {
// for BiosBootBypass() to work
// io address var abused as a "already been here" flag
emit_readword(&address, 1);
emit_jne(new_dyna_leave);
#endif
}
- for(i=0;i<slen;i++)
+ for(i=0;i<st.slen;i++)
{
__builtin_prefetch(regs[i+1].regmap);
check_regmap(regmap_pre[i]);
check_regmap(regs[i].regmap_entry);
check_regmap(regs[i].regmap);
//if(ds) printf("ds: ");
- disassemble_inst(i);
+ disassemble_inst(i, st.start, st.source);
if(ds) {
ds=0; // Skip delay slot
if(dops[i].bt) assem_debug("OOPS - branch into delay slot\n");
- instr_addr[i] = NULL;
+ st.instr_addr[i] = NULL;
} else {
- speculate_register_values(i);
+ speculate_register_values(&st, i);
#ifndef DESTRUCTIVE_WRITEBACK
if (i < 2 || !dops[i-2].is_ujump)
{
loop_preload(regmap_pre[i],regs[i].regmap_entry);
}
// branch target entry point
- instr_addr[i] = out;
+ st.instr_addr[i] = out;
assem_debug("<->\n");
drc_dbg_emit_do_cmp(i, cinfo[i].ccadj);
if (clear_hack_addr) {
wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty);
load_regs(regs[i].regmap_entry,regs[i].regmap,dops[i].rs1,dops[i].rs2);
address_generation(i,®s[i],regs[i].regmap_entry);
- load_consts(regmap_pre[i],regs[i].regmap,i);
+ load_consts(&st,regmap_pre[i],regs[i].regmap,i);
if(dops[i].is_jump)
{
// Load the delay slot registers if necessary
if (dops[i+1].is_store)
load_reg(regs[i].regmap_entry,regs[i].regmap,INVCP);
}
- else if(i+1<slen)
+ else if(i+1 < st.slen)
{
// Preload registers for following instruction
if(dops[i+1].rs1!=dops[i].rs1&&dops[i+1].rs1!=dops[i].rs2)
if (dops[i].is_store)
load_reg(regs[i].regmap_entry,regs[i].regmap,INVCP);
- ds = assemble(i, ®s[i], cinfo[i].ccadj);
+ ds = assemble(&st, i, ®s[i], cinfo[i].ccadj);
drc_dbg_emit_wb_dirtys(i, ®s[i]);
if (dops[i].is_ujump)
}
}
- assert(slen > 0);
- if (slen > 0 && dops[slen-1].itype == INTCALL) {
+ assert(st.slen > 0);
+ if (st.slen > 0 && dops[st.slen-1].itype == INTCALL) {
// no ending needed for this block since INTCALL never returns
}
// If the block did not end with an unconditional branch,
else if (i > 1) {
if (!dops[i-2].is_ujump) {
assert(!dops[i-1].is_jump);
- assert(i==slen);
+ assert(i==st.slen);
if(dops[i-2].itype!=CJUMP&&dops[i-2].itype!=SJUMP) {
- store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4);
+ store_regs_bt(&st,regs[i-1].regmap,regs[i-1].dirty,st.start+i*4);
if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
emit_loadreg(CCREG,HOST_CCREG);
emit_addimm(HOST_CCREG, cinfo[i-1].ccadj + CLOCK_ADJUST(1), HOST_CCREG);
}
else
{
- store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].dirty,start+i*4);
+ store_regs_bt(&st,branch_regs[i-2].regmap,branch_regs[i-2].dirty,st.start+i*4);
assert(branch_regs[i-2].regmap[HOST_CCREG]==CCREG);
}
- add_to_linker(out,start+i*4,0);
+ add_to_linker(&st,out,st.start+i*4,0);
emit_jmp(0);
}
}
{
assert(i>0);
assert(!dops[i-1].is_jump);
- store_regs_bt(regs[i-1].regmap,regs[i-1].dirty,start+i*4);
+ store_regs_bt(&st, regs[i-1].regmap, regs[i-1].dirty, st.start+i*4);
if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
emit_loadreg(CCREG,HOST_CCREG);
emit_addimm(HOST_CCREG, cinfo[i-1].ccadj + CLOCK_ADJUST(1), HOST_CCREG);
- add_to_linker(out,start+i*4,0);
+ add_to_linker(&st,out,st.start+i*4,0);
emit_jmp(0);
}
case LOADW_STUB:
case LOADBU_STUB:
case LOADHU_STUB:
- do_readstub(i);break;
+ do_readstub(&st, i);
+ break;
case STOREB_STUB:
case STOREH_STUB:
case STOREW_STUB:
- do_writestub(i);break;
+ do_writestub(&st, i);
+ break;
case CC_STUB:
- do_ccstub(i);break;
+ do_ccstub(&st, i);
+ break;
case INVCODE_STUB:
- do_invstub(i);break;
+ do_invstub(&st, i);
+ break;
case STORELR_STUB:
- do_unalignedwritestub(i);break;
+ do_unalignedwritestub(&st, i);
+ break;
case OVERFLOW_STUB:
- do_overflowstub(i); break;
+ do_overflowstub(&st, i);
+ break;
case ALIGNMENT_STUB:
- do_alignmentstub(i); break;
+ do_alignmentstub(&st, i);
+ break;
default:
assert(0);
}
}
if (instr_addr0_override)
- instr_addr[0] = instr_addr0_override;
+ st.instr_addr[0] = instr_addr0_override;
#if 0
/* check for improper expiration */
/* Pass 9 - Linker */
u_int jump_out_count = 0;
- for (i = 0; i < linkcount; i++)
+ for (i = 0; i < st.linkcount; i++)
{
assem_debug("link: %p -> %08x\n",
- log_addr(link_addr[i].addr), link_addr[i].target);
+ log_addr(st.link_addr[i].addr), st.link_addr[i].target);
literal_pool(64);
- if (!link_addr[i].internal)
+ if (!st.link_addr[i].internal)
{
void *stub = out;
- void *addr = check_addr(link_addr[i].target);
- emit_extjump_stub(link_addr[i].addr, link_addr[i].target);
+ void *addr = check_addr(st.link_addr[i].target);
+ emit_extjump_stub(st.link_addr[i].addr, st.link_addr[i].target);
jump_out_count++;
if (addr) {
- set_jump_target(link_addr[i].addr, addr);
- ndrc_add_jump_out(link_addr[i].target, stub);
+ set_jump_target(st.link_addr[i].addr, addr);
+ ndrc_add_jump_out(st.link_addr[i].target, stub);
}
else
- set_jump_target(link_addr[i].addr, stub);
+ set_jump_target(st.link_addr[i].addr, stub);
}
else
{
// Internal branch
- int target=(link_addr[i].target-start)>>2;
- assert(target>=0&&target<slen);
- assert(instr_addr[target]);
- set_jump_target(link_addr[i].addr, instr_addr[target]);
+ int target = (st.link_addr[i].target - st.start) >> 2;
+ assert(target>=0&&target<st.slen);
+ assert(st.instr_addr[target]);
+ set_jump_target(st.link_addr[i].addr, st.instr_addr[target]);
}
}
- u_int source_len = slen*4;
- if (dops[slen-1].itype == INTCALL && !dops[slen-1].bt && source_len > 4)
+ u_int source_len = st.slen*4;
+ if (dops[st.slen-1].itype == INTCALL && !dops[st.slen-1].bt && source_len > 4)
// no need to treat the last instruction as compiled
// as the interpreter fully handles it
source_len -= 4;
// External Branch Targets (jump_in)
int jump_in_count = 1;
- assert(instr_addr[0]);
- for (i = 1; i < slen; i++)
+ assert(st.instr_addr[0]);
+ for (i = 1; i < st.slen; i++)
{
- if (dops[i].bt && instr_addr[i])
+ if (dops[i].bt && st.instr_addr[i])
jump_in_count++;
}
struct block_info *block =
- block_info_new(start, source_len, source, jump_in_count, jump_out_count);
+ block_info_new(st.start, source_len, st.source, jump_in_count, jump_out_count);
block->reg_sv_flags = state_rflags;
int jump_in_i = 0;
- for (i = 0; i < slen; i++)
+ for (i = 0; i < st.slen; i++)
{
- if ((i == 0 || dops[i].bt) && instr_addr[i])
+ if ((i == 0 || dops[i].bt) && st.instr_addr[i])
{
- assem_debug("%p (%d) <- %8x\n", log_addr(instr_addr[i]), i, start + i*4);
- u_int vaddr = start + i*4;
+ assem_debug("%p (%d) <- %8x\n", log_addr(st.instr_addr[i]), i, st.start + i*4);
+ u_int vaddr = st.start + i*4;
literal_pool(256);
void *entry = out;
load_regs_entry(i);
if (entry == out)
- entry = instr_addr[i];
+ entry = st.instr_addr[i];
else
- emit_jmp(instr_addr[i]);
+ emit_jmp(st.instr_addr[i]);
block->jump_in[jump_in_i].vaddr = vaddr;
block->jump_in[jump_in_i].addr = entry;
assert(out - (u_char *)beginning < MAX_OUTPUT_BLOCK_SIZE);
end_block(beginning);
- block_info_finish(block, beginning, out);
+ block_info_finish(&st, block, beginning, out);
block = NULL;
// Trap writes to any of the pages we compiled
if (source_len)
- mark_invalid_code(start, source_len, 0);
+ mark_invalid_code(st.start, source_len, 0);
#ifdef ASSEM_PRINT
fflush(stdout);