CFLAGS += -O3 -DNDEBUG
endif
-# This is actually needed, bevieve me.
+# This is actually needed, believe me.
# If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere.
ifndef NO_ALIGN_FUNCTIONS
CFLAGS += -falign-functions=2
#define A64_ROR_REG(rd, rn, rm) \
A64_INSN(0xd,0x0,0x3,_,rm,_,0xb,rn,rd)
-// rd = REVERSE(n) rn
+// rd = REVERSE(rn)
#define A64_RBIT_REG(rd, rn) \
A64_INSN(0xd,0x2,0x3,_,_,_,_,rn,rd)
// if-then-else conditional execution helpers
-#define JMP_POS(ptr) \
+#define JMP_POS(ptr) { \
ptr = tcache_ptr; \
- EMIT(A64_B(0));
+ EMIT(A64_B(0)); \
+}
#define JMP_EMIT(cond, ptr) { \
u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \
emith_tst_r_imm(sr, S); \
EMITH_SJMP_START(DCOND_EQ); \
/* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
- /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \
- emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \
- emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \
+ /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
+ emith_asr(rn, mh, 15); \
+ emith_addf_r_r_r_lsr(rn, rn, mh, 31); \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
#define emith_tpop_carry(sr, is_sub) do { \
if (is_sub) \
emith_eor_r_imm(sr, 1); \
- emith_lsrf(sr, sr, 1); \
+ emith_ror(sr, sr, 1); \
+ emith_addf_r_r(sr, sr); \
} while (0)
#define emith_tpush_carry(sr, is_sub) do { \
- emith_adc_r_r(sr, sr); \
+ emith_adc_r_r(sr, Z0); \
if (is_sub) \
emith_eor_r_imm(sr, 1); \
} while (0)
#define AT 1 // used to hold intermediate results
#define FNZ 15 // emulated processor flags: N (bit 31) ,Z (all bits)
#define FC 24 // emulated processor flags: C (bit 0), others 0
-#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others ?
+#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others x
// unified conditions; virtual, not corresponding to anything real on MIPS
} while (0)
// FIFO for 2 instructions, for delay slot handling
-u32 emith_last_insns[2] = { -1,-1 };
-int emith_last_idx, emith_last_cnt;
+static u32 emith_last_insns[2] = { -1,-1 };
+static int emith_last_idx, emith_last_cnt;
#define EMIT_PUSHOP() \
do { \
((op>>26) == OP__RT && ((op>>16) & 036) == RT_BLTZ); }
// register usage for dependency evaluation XXX better do this as in emit_arm?
static uint64_t emith_has_rs[3] = // OP__FN, OP__RT, others
- { 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007f30ULL };
+ { 0x00fffffffffa0ff0ULL, 0x000fff0fUL, 0xffffffff0f007ff0ULL };
static uint64_t emith_has_rt[3] = // OP__FN, OP__RT, others
{ 0xff00fffffff00cffULL, 0x00000000UL, 0x8000ff0000000030ULL };
static uint64_t emith_has_rd[3] = // OP__FN, OP__RT, others (rt instead of rd)
bop = emith_b_isswap(op, op2);
}
+ // flush FIFO and branch
+ tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt);
+ if (emith_last_insns[idx^1] != -1)
+ EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]);
if (bop) { // can swap
- tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt);
- if (emith_last_insns[idx^1] != -1)
- EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]);
bp = tcache_ptr;
EMIT_PTR(tcache_ptr, bop); COUNT_OP;
EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
- emith_last_insns[0] = emith_last_insns[1] = -1;
- emith_last_cnt = 0;
} else { // can't swap
- emith_flush();
+ if (emith_last_insns[idx] != -1)
+ EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
bp = tcache_ptr;
EMIT_PTR(tcache_ptr, op); COUNT_OP;
EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP;
}
+ emith_last_insns[0] = emith_last_insns[1] = -1;
+ emith_last_cnt = 0;
return bp;
}
// flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns.
// flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check()
-int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (aka cmp_r_r)
-int emith_flg_noV; // V flag known not to be set
+static int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (cmp_r_r)
+static int emith_flg_noV; // V flag known not to be set
// store minimal cc information: rd, rt^rs, carry
// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt.
// move immediate
static void emith_move_imm(int r, uintptr_t imm)
{
- if ((s16)imm != imm) {
+ if ((s16)imm == imm) {
+ EMIT(MIPS_ADD_IMM(r, Z0, imm));
+ } else if (!(imm >> 16)) {
+ EMIT(MIPS_OR_IMM(r, Z0, imm));
+ } else {
int s = Z0;
if (imm >> 16) {
EMIT(MIPS_MOVT_IMM(r, imm >> 16));
}
if ((u16)imm)
EMIT(MIPS_OR_IMM(r, s, (u16)imm));
- } else
- EMIT(MIPS_ADD_IMM(r, Z0, imm));
+ }
}
#define emith_move_r_ptr_imm(r, imm) \
emith_tst_r_imm(sr, S); \
EMITH_SJMP_START(DCOND_EQ); \
/* overflow if top 17 bits of MACH aren't all 1 or 0 */ \
- /* to check: add MACH[15] to MACH[31:16]. this is 0 if no overflow */ \
- emith_asrf(rn, mh, 16); /* sum = (MACH>>16) + ((MACH>>15)&1) */ \
- emith_adcf_r_imm(rn, 0); /* (MACH>>15) is in carry after shift */ \
+ /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \
+ emith_asr(rn, mh, 15); \
+ emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \
+ emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> ov */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x0000); /* -overflow */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x8000); \
- EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> +ovl */ \
- emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0xffffffff */ \
- emith_sub_r_imm_c(DCOND_GT, mh, 1); /* 0x00007fff */ \
- EMITH_SJMP_END(DCOND_LE); \
+ EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> +ovl */ \
+ emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0xffffffff */ \
+ emith_sub_r_imm_c(DCOND_MI, mh, 1); /* 0x00007fff */ \
+ EMITH_SJMP_END(DCOND_PL); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)
/* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \
/* to check: add MACL[31] to MACH. this is 0 if no overflow */ \
emith_lsr(rn, ml, 31); \
- emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \
+ emith_add_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \
+ emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \
EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \
/* XXX: LSB signalling only in SH1, or in SH2 too? */ \
emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \
emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* negative ovrfl */ \
- EMITH_SJMP_START(DCOND_LE); /* sum > 0 -> positive ovrfl */ \
- emith_sub_r_imm_c(DCOND_GT, ml, 1); /* 0x7fffffff */ \
- EMITH_SJMP_END(DCOND_LE); \
+ EMITH_SJMP_START(DCOND_PL); /* sum > 0 -> positive ovrfl */ \
+ emith_sub_r_imm_c(DCOND_MI, ml, 1); /* 0x7fffffff */ \
+ EMITH_SJMP_END(DCOND_PL); \
EMITH_SJMP_END(DCOND_EQ); \
EMITH_SJMP_END(DCOND_EQ); \
} while (0)
rcache_free_tmp(tmp_); \
} while (0)
+#define emith_carry_to_t(sr, is_sub) do { \
+ emith_rorc(sr); \
+ emith_rol(sr, sr, 1); \
+} while (0)
+
#define emith_tpop_carry(sr, is_sub) \
emith_lsr(sr, sr, 1)
// 800 - state dump on exit
// {
#ifndef DRC_DEBUG
-#define DRC_DEBUG 0//x8c7
+#define DRC_DEBUG 0//x847
#endif
#if DRC_DEBUG
void *block_entry_ptr;
struct block_desc *block;
struct block_entry *entry;
+ struct block_link *bl;
u16 *dr_pc_base;
struct op_data *opd;
int blkid_main = 0;
if (pinned_loop_pc[pinned_loop_count] == pc) {
// pin needed regs on loop entry
FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v));
+ emith_flush();
pinned_loop_ptr[pinned_loop_count] = tcache_ptr;
} else
op_flags[i] &= ~OF_BASIC_LOOP;
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_invalidate_t();
- emith_tpop_carry(sr, 0); // dummy
emith_lslf(tmp, tmp2, 1);
- emith_tpush_carry(sr, 0);
+ emith_carry_to_t(sr, 0);
goto end_op;
case 1: // DT Rn 0100nnnn00010000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_invalidate_t();
- emith_tpop_carry(sr, 0); // dummy
if (op & 0x20) {
emith_asrf(tmp, tmp2, 1);
} else
emith_lsrf(tmp, tmp2, 1);
- emith_tpush_carry(sr, 0);
+ emith_carry_to_t(sr, 0);
goto end_op;
case 1: // CMP/PZ Rn 0100nnnn00010001
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_invalidate_t();
- emith_tpop_carry(sr, 0); // dummy
if (op & 1) {
emith_rorf(tmp, tmp2, 1);
} else
emith_rolf(tmp, tmp2, 1);
- emith_tpush_carry(sr, 0);
+ emith_carry_to_t(sr, 0);
goto end_op;
case 0x24: // ROTCL Rn 0100nnnn00100100
case 0x25: // ROTCR Rn 0100nnnn00100101
int cond = -1;
int ctaken = 0;
void *target = NULL;
- struct block_link *bl = NULL;
if (OP_ISBRACND(opd_b->op))
ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2;
}
}
- if (bl)
- memcpy(bl->jdisp, bl->jump, emith_jump_at_size());
#if CALL_STACK
if (rtsadd)
emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret);
}
else if (drcf.pending_branch_indirect) {
u32 target_pc;
- struct block_link *bl = NULL;
tmp = rcache_get_reg_arg(0, SHR_PC, NULL);
if (! OP_ISBRAUC(opd->op))
{
- struct block_link *bl;
-
tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
FLUSH_CYCLES(tmp);
emith_sync_t(tmp);
emith_move_r_imm(tmp, pc);
emith_jump_patchable(sh2_drc_dispatcher);
rcache_invalidate();
-
- if (bl)
- memcpy(bl->jdisp, bl->jump, emith_jump_at_size());
} else
rcache_flush();
// emit blx area
for (i = 0; i < blx_target_count; i++) {
void *target = (blx_target_pc[i] & 1 ? sh2_drc_exit : sh2_drc_dispatcher);
- struct block_link *bl = blx_target_bl[i];
emith_pool_check();
+ bl = blx_target_bl[i];
if (bl)
bl->blx = tcache_ptr;
emith_jump_patch(blx_target_ptr[i], tcache_ptr, NULL);
emith_move_r_imm(tmp, blx_target_pc[i] & ~1);
emith_jump(target);
rcache_invalidate();
-
- if (bl)
- memcpy(bl->jdisp, bl->blx, emith_jump_at_size());
}
emith_flush();
emith_jump_patch(branch_patch_ptr[i], target, NULL);
}
+ // fill blx backup; do this last to backup final patched code
+ for (i = 0; i < block->entry_count; i++)
+ for (bl = block->entryp[i].o_links; bl; bl = bl->o_next)
+ memcpy(bl->jdisp, bl->blx ?: bl->jump, emith_jump_at_size());
+
tcache_ptrs[tcache_id] = tcache_ptr;
host_instructions_updated(block_entry_ptr, tcache_ptr);