literal_insn[pool_index] += move_offs;
}
+#define EMITH_HINT_COND(cond) /**/
+
#define JMP_POS(ptr) { \
ptr = tcache_ptr; \
EMIT(0,M1(PC),0); \
#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \
emith_add_r_r_r_lsl(d, s1, s2, lslimm)
+#define emith_adc_r_r_r_lsl(d, s1, s2, lslimm) \
+ EOP_ADC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
+
#define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
-
#define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \
EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm)
#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_SUB_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
+#define emith_sbc_r_r_r_lsl(d, s1, s2, lslimm) \
+ EOP_SBC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
+
#define emith_subf_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_SUB_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm)
#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
+#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) \
+ EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm)
#define emith_eor_r_r_r_lsl(d, s1, s2, lslimm) \
EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm)
-
#define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) \
EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm)
#define emith_or_r_r_lsl(d, s, lslimm) \
emith_or_r_r_r_lsl(d, d, s, lslimm)
+#define emith_or_r_r_lsr(d, s, lsrimm) \
+ emith_or_r_r_r_lsr(d, d, s, lsrimm)
+#define emith_eor_r_r_lsl(d, s, lslimm) \
+ emith_eor_r_r_r_lsl(d, d, s, lslimm)
#define emith_eor_r_r_lsr(d, s, lsrimm) \
emith_eor_r_r_r_lsr(d, d, s, lsrimm)
#define emith_add_r_r_r(d, s1, s2) \
emith_add_r_r_r_lsl(d, s1, s2, 0)
+#define emith_adc_r_r_r(d, s1, s2) \
+ emith_adc_r_r_r_lsl(d, s1, s2, 0)
+
#define emith_addf_r_r_r(d, s1, s2) \
emith_addf_r_r_r_lsl(d, s1, s2, 0)
#define emith_sub_r_r_r(d, s1, s2) \
emith_sub_r_r_r_lsl(d, s1, s2, 0)
+#define emith_sbc_r_r_r(d, s1, s2) \
+ emith_sbc_r_r_r_lsl(d, s1, s2, 0)
+
#define emith_subf_r_r_r(d, s1, s2) \
emith_subf_r_r_r_lsl(d, s1, s2, 0)
#define emith_add_r_r_ptr(d, s) \
emith_add_r_r_r(d, d, s)
+#define emith_adc_r_r(d, s) \
+ emith_adc_r_r_r(d, d, s)
+
#define emith_sub_r_r(d, s) \
emith_sub_r_r_r(d, d, s)
-#define emith_adc_r_r(d, s) \
- EOP_ADC_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0)
+#define emith_sbc_r_r(d, s) \
+ emith_sbc_r_r_r(d, d, s)
+
+#define emith_negc_r_r(d, s) \
+ EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,0,s,d,0,0)
#define emith_and_r_r_c(cond, d, s) \
EOP_AND_REG(cond,0,d,d,s,A_AM1_LSL,0)
#define emith_rolcf(d) \
emith_adcf_r_r(d, d)
+#define emith_rolc(d) \
+ emith_adc_r_r(d, d)
#define emith_rorcf(d) \
EOP_MOV_REG(A_COND_AL,1,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */
+#define emith_rorc(d) \
+ EOP_MOV_REG(A_COND_AL,0,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */
#define emith_negcf_r_r(d, s) \
EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,1,s,d,0,0)
} \
} while (0)
+#define emith_t_to_carry(srr, is_sub) do { \
+ if (is_sub) { \
+ int t_ = rcache_get_tmp(); \
+ emith_eor_r_r_imm(t_, srr, 1); \
+ emith_rorf(t_, t_, 1); \
+ rcache_free_tmp(t_); \
+ } else { \
+ emith_rorf(srr, srr, 1); \
+ emith_rol(srr, srr, 1); \
+ } \
+} while (0)
+
#define emith_tpop_carry(sr, is_sub) do { \
if (is_sub) \
emith_eor_r_imm(sr, 1); \
JMP_EMIT_NC(else_ptr); \
}
+#define EMITH_HINT_COND(cond) /**/
+
// "simple" jump (no more then a few insns)
// ARM32 will use conditional instructions here
#define EMITH_SJMP_START EMITH_JMP_START
#define emith_addf_r_r_r_lsr(d, s1, s2, simm) \
EMIT(A64_ADDS_REG(d, s1, s2, ST_LSR, simm))
+#define emith_adc_r_r_r_lsl(d, s1, s2, simm) \
+ if (simm) { int _t = rcache_get_tmp(); \
+ emith_lsl(_t, s2, simm); \
+ emith_adc_r_r_r(d, s1, _t); \
+ rcache_free_tmp(_t); \
+ } else \
+ emith_adc_r_r_r(d, s1, s2); \
+} while (0)
+
+#define emith_sbc_r_r_r_lsl(d, s1, s2, simm) \
+ if (simm) { int _t = rcache_get_tmp(); \
+ emith_lsl(_t, s2, simm); \
+ emith_sbc_r_r_r(d, s1, _t); \
+ rcache_free_tmp(_t); \
+ } else \
+ emith_sbc_r_r_r(d, s1, s2); \
+} while (0)
+
#define emith_sub_r_r_r_lsl(d, s1, s2, simm) \
EMIT(A64_SUB_REG(d, s1, s2, ST_LSL, simm))
#define emith_or_r_r_r_lsl(d, s1, s2, simm) \
EMIT(A64_OR_REG(d, s1, s2, ST_LSL, simm))
+#define emith_or_r_r_r_lsr(d, s1, s2, simm) \
+ EMIT(A64_OR_REG(d, s1, s2, ST_LSR, simm))
#define emith_eor_r_r_r_lsl(d, s1, s2, simm) \
EMIT(A64_EOR_REG(d, s1, s2, ST_LSL, simm))
-
#define emith_eor_r_r_r_lsr(d, s1, s2, simm) \
EMIT(A64_EOR_REG(d, s1, s2, ST_LSR, simm))
#define emith_or_r_r_lsl(d, s, lslimm) \
emith_or_r_r_r_lsl(d, d, s, lslimm)
+#define emith_or_r_r_lsr(d, s, lsrimm) \
+ emith_or_r_r_r_lsr(d, d, s, lsrimm)
+#define emith_eor_r_r_lsl(d, s, lslimm) \
+ emith_eor_r_r_r_lsl(d, d, s, lslimm)
#define emith_eor_r_r_lsr(d, s, lsrimm) \
emith_eor_r_r_r_lsr(d, d, s, lsrimm)
#define emith_neg_r_r(d, s) \
EMIT(A64_NEG_REG(d, s, ST_LSL, 0))
+#define emith_negc_r_r(d, s) \
+ EMIT(A64_NEGC_REG(d, s))
+
#define emith_adc_r_r_r(d, s1, s2) \
EMIT(A64_ADC_REG(d, s1, s2))
#define emith_adcf_r_r_r(d, s1, s2) \
EMIT(A64_ADCS_REG(d, s1, s2))
+#define emith_sbc_r_r_r(d, s1, s2) \
+ EMIT(A64_SBC_REG(d, s1, s2))
+
#define emith_sbcf_r_r_r(d, s1, s2) \
EMIT(A64_SBCS_REG(d, s1, s2))
#define emith_rolcf(d) \
emith_adcf_r_r(d, d)
+#define emith_rolc(d) \
+ emith_adc_r_r(d, d)
#define emith_rorcf(d) do { \
EMIT(A64_RBIT_REG(d, d)); \
emith_adcf_r_r(d, d); \
EMIT(A64_RBIT_REG(d, d)); \
} while (0)
+#define emith_rorc(d) do { \
+ EMIT(A64_RBIT_REG(d, d)); \
+ emith_adc_r_r(d, d); \
+ EMIT(A64_RBIT_REG(d, d)); \
+} while (0)
// signed/unsigned extend
#define emith_clear_msb(d, s, count) /* bits to clear */ \
emith_eor_r_imm(sr, 1); \
} while (0)
+#define emith_t_to_carry(srr, is_sub) do { \
+ if (is_sub) { \
+ int t_ = rcache_get_tmp(); \
+ emith_eor_r_r_imm(t_, srr, 1); \
+ emith_rorf(t_, t_, 1); \
+ rcache_free_tmp(t_); \
+ } else { \
+ emith_rorf(srr, srr, 1); \
+ emith_rol(srr, srr, 1); \
+ } \
+} while (0)
+
#define emith_tpop_carry(sr, is_sub) do { \
if (is_sub) \
emith_eor_r_imm(sr, 1); \
MIPS_OP_REG(FN_JALR,rd,rs,_)
// conditional branches; no condition code, these compare rs against rt or Z0
-#define MIPS_BEQ (OP_BEQ << 5)
-#define MIPS_BNE (OP_BNE << 5)
-#define MIPS_BLE (OP_BLEZ << 5)
-#define MIPS_BGT (OP_BGTZ << 5)
-#define MIPS_BLT ((OP__RT << 5)|RT_BLTZ)
-#define MIPS_BGE ((OP__RT << 5)|RT_BGEZ)
-#define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL)
-#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL)
-
+#define MIPS_BEQ (OP_BEQ << 5) // rs == rt (rt in lower 5 bits)
+#define MIPS_BNE (OP_BNE << 5) // rs != rt (ditto)
+#define MIPS_BLE (OP_BLEZ << 5) // rs <= 0
+#define MIPS_BGT (OP_BGTZ << 5) // rs > 0
+#define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) // rs < 0
+#define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) // rs >= 0
+#define MIPS_BGTL ((OP__RT << 5)|RT_BLTZAL) // rs > 0, link $ra if jumping
+#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) // rs >= 0, link $ra if jumping
+
+#define MIPS_BCOND(cond, rs, rt, offs16) \
+ MIPS_OP_IMM((cond >> 5), rt, rs, (offs16) >> 2)
#define MIPS_BCONDZ(cond, rs, offs16) \
MIPS_OP_IMM((cond >> 5), (cond & 0x1f), rs, (offs16) >> 2)
#define MIPS_B(offs16) \
ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \
} while (0)
-// FIFO for 2 instructions, for delay slot handling
-static u32 emith_last_insns[2] = { -1,-1 };
-static int emith_last_idx, emith_last_cnt;
+// FIFO for some instructions, for delay slot handling
+#define FSZ 4
+static u32 emith_last_insns[FSZ];
+static unsigned emith_last_idx, emith_last_cnt;
#define EMIT_PUSHOP() \
do { \
- emith_last_idx ^= 1; \
- if (emith_last_insns[emith_last_idx] != -1) { \
+ if (emith_last_cnt > 0) { \
u32 *p = (u32 *)tcache_ptr - emith_last_cnt; \
- EMIT_PTR(p, emith_last_insns[emith_last_idx]);\
+ int idx = (emith_last_idx - emith_last_cnt+1) %FSZ; \
+ EMIT_PTR(p, emith_last_insns[idx]);\
emith_last_cnt --; \
} \
- emith_last_insns[emith_last_idx] = -1; \
} while (0)
#define EMIT(op) \
do { \
- EMIT_PUSHOP(); \
+ if (emith_last_cnt >= FSZ) EMIT_PUSHOP(); \
tcache_ptr = (void *)((u32 *)tcache_ptr + 1); \
+ emith_last_idx = (emith_last_idx+1) %FSZ; \
emith_last_insns[emith_last_idx] = op; \
emith_last_cnt ++; \
COUNT_OP; \
#define emith_flush() \
do { \
- int i; for (i = 0; i < 2; i++) EMIT_PUSHOP(); \
+ while (emith_last_cnt) EMIT_PUSHOP(); \
+ emith_flg_hint = _FHV|_FHC; \
} while (0)
#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr - emith_last_cnt)
return emith_has_(rt,2,op,26,0x3f) ? (op>>16)&0x1f : 0;
}
static int emith_rd(u32 op)
- { if ((op>>26) == OP__FN)
- return emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1;
+ { int ret = emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1;
+ if ((op>>26) == OP__FN)
+ ret = emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1;
if ((op>>26) == OP__RT)
- return -1;
- return emith_has_(rd,2,op,26,0x3f) ? (op>>16)&0x1f :-1;
+ ret = -1;
+ return (ret ?: -1); // Z0 doesn't have dependencies
}
static int emith_b_isswap(u32 bop, u32 lop)
return bop;
else if (emith_is_jr(bop) && emith_rd(lop) != emith_rs(bop))
return bop;
- else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop))
+ else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop) &&
+ emith_rd(lop) != emith_rt(bop))
if ((bop & 0xffff) != 0x7fff) // displacement overflow?
return (bop & 0xffff0000) | ((bop+1) & 0x0000ffff);
return 0;
}
+static int emith_insn_swappable(u32 op1, u32 op2)
+{
+ if (emith_rd(op1) != emith_rd(op2) &&
+ emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) &&
+ emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1))
+ return 1;
+ return 0;
+}
+
// emit branch, trying to fill the delay slot with one of the last insns
static void *emith_branch(u32 op)
{
- int idx = emith_last_idx;
- u32 op1 = emith_last_insns[idx], op2 = emith_last_insns[idx^1];
- u32 bop = 0;
+ unsigned idx = emith_last_idx, ds = idx;
+ u32 bop = 0, sop;
void *bp;
-
- // check last insn (op1)
- if (op1 != -1 && op1)
- bop = emith_b_isswap(op, op1);
- // if not, check older insn (op2); mustn't interact with op1 to overtake
- if (!bop && op2 != -1 && op2 && emith_rd(op1) != emith_rd(op2) &&
- emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) &&
- emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1)) {
- idx ^= 1;
- bop = emith_b_isswap(op, op2);
+ int i, j, s;
+
+ // check for ds insn; older mustn't interact with newer ones to overtake
+ for (i = 0; i < emith_last_cnt && !bop; i++) {
+ ds = (idx-i)%FSZ;
+ sop = emith_last_insns[ds];
+ for (j = i, s = 1; j > 0 && s; j--)
+ s = emith_insn_swappable(emith_last_insns[(ds+j)%FSZ], sop);
+ if (s)
+ bop = emith_b_isswap(op, sop);
}
- // flush FIFO and branch
+ // flush FIFO, but omit delay slot insn
tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt);
- if (emith_last_insns[idx^1] != -1)
- EMIT_PTR(tcache_ptr, emith_last_insns[idx^1]);
+ idx = (idx-emith_last_cnt+1)%FSZ;
+ for (i = emith_last_cnt; i > 0; i--, idx = (idx+1)%FSZ)
+ if (!bop || idx != ds)
+ EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
+ emith_last_cnt = 0;
+ // emit branch and delay slot
+ bp = tcache_ptr;
if (bop) { // can swap
- bp = tcache_ptr;
EMIT_PTR(tcache_ptr, bop); COUNT_OP;
- EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
+ EMIT_PTR(tcache_ptr, emith_last_insns[ds]);
} else { // can't swap
- if (emith_last_insns[idx] != -1)
- EMIT_PTR(tcache_ptr, emith_last_insns[idx]);
- bp = tcache_ptr;
EMIT_PTR(tcache_ptr, op); COUNT_OP;
EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP;
}
- emith_last_insns[0] = emith_last_insns[1] = -1;
- emith_last_cnt = 0;
return bp;
}
// flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns.
// flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check()
-static int emith_flg_rs, emith_flg_rt; // registers used in FNZ=rs-rt (cmp_r_r)
+static int emith_cmp_rs, emith_cmp_rt; // registers used in cmp_r_r/cmp_r_imm
+static s32 emith_cmp_imm; // immediate value used in cmp_r_imm
+enum { _FHC=1, _FHV=2 } emith_flg_hint; // C/V flag usage hinted by compiler
static int emith_flg_noV; // V flag known not to be set
+#define EMITH_HINT_COND(cond) do { \
+ /* only need to check cond>>1 since the lowest bit inverts the cond */ \
+ unsigned _mv = BITMASK3(DCOND_VS>>1,DCOND_GE>>1,DCOND_GT>>1); \
+ unsigned _mc = _mv | BITMASK2(DCOND_HS>>1,DCOND_HI>>1); \
+ emith_flg_hint = (_mv & BITMASK1(cond >> 1) ? _FHV : 0); \
+ emith_flg_hint |= (_mc & BITMASK1(cond >> 1) ? _FHC : 0); \
+} while (0)
+
// store minimal cc information: rd, rt^rs, carry
// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt.
// NB: for adcf and sbcf, carry-in must be dealt with separately (see there)
-static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub)
+static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub)
{
- if (sub && rd == FNZ && rt > AT && rs > AT) // is this cmp_r_r?
- emith_flg_rs = rs, emith_flg_rt = rt;
- else emith_flg_rs = emith_flg_rt = 0;
-
- if (sub) // C = sub:rt<rd, add:rd<rt
- EMIT(MIPS_SLTU_REG(FC, rt, FNZ));
- else EMIT(MIPS_SLTU_REG(FC, FNZ, rt));// C in FC, bit 0
-
- emith_flg_noV = 0;
- if (rs > 0) // Nt^Ns
- EMIT(MIPS_XOR_REG(FV, rt, rs));
- else if (imm < 0)
- EMIT(MIPS_NOR_REG(FV, rt, Z0));
- else if (imm > 0)
- EMIT(MIPS_OR_REG(FV, rt, Z0)); // Nt^Ns in FV, bit 31
- else emith_flg_noV = 1; // imm #0, never overflows
+ if (emith_flg_hint & _FHC) {
+ if (sub) // C = sub:rt<rd, add:rd<rt
+ EMIT(MIPS_SLTU_REG(FC, rs, FNZ));
+ else EMIT(MIPS_SLTU_REG(FC, FNZ, rs));// C in FC, bit 0
+ }
+
+ if (emith_flg_hint & _FHV) {
+ emith_flg_noV = 0;
+ if (rt >= 0) // Nt^Ns in FV, bit 31
+ EMIT(MIPS_XOR_REG(FV, rs, rt));
+ else if (imm == 0)
+ emith_flg_noV = 1; // imm #0 can't overflow
+ else if ((imm < 0) == !sub)
+ EMIT(MIPS_NOR_REG(FV, rs, Z0));
+ else if ((imm > 0) == !sub)
+ EMIT(MIPS_OR_REG(FV, rs, Z0));
+ }
// full V = Nd^Nt^Ns^C calculation is deferred until really needed
- if (rd != FNZ)
+ if (rd && rd != FNZ)
EMIT(MIPS_MOVE_REG(rd, FNZ)); // N,Z via result value in FNZ
+ emith_cmp_rs = emith_cmp_rt = -1;
+}
+
+// since MIPS has less-than and compare-branch insns, handle cmp separately by
+// storing the involved regs for later use in one of those MIPS insns.
+// This works for all conditions but VC/VS, but this is fortunately never used.
+static void emith_set_compare_flags(int rs, int rt, s32 imm)
+{
+ emith_cmp_rt = rt;
+ emith_cmp_rs = rs;
+ emith_cmp_imm = imm;
}
// data processing, register
} else EMIT(MIPS_OR_REG(d, s1, s2)); \
} while (0)
+#define emith_or_r_r_r_lsr(d, s1, s2, simm) do { \
+ if (simm) { \
+ EMIT(MIPS_LSR_IMM(AT, s2, simm)); \
+ EMIT(MIPS_OR_REG(d, s1, AT)); \
+ } else EMIT(MIPS_OR_REG(d, s1, s2)); \
+} while (0)
+
#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \
if (simm) { \
EMIT(MIPS_LSL_IMM(AT, s2, simm)); \
#define emith_or_r_r_lsl(d, s, lslimm) \
emith_or_r_r_r_lsl(d, d, s, lslimm)
+#define emith_or_r_r_lsr(d, s, lsrimm) \
+ emith_or_r_r_r_lsr(d, d, s, lsrimm)
+#define emith_eor_r_r_lsl(d, s, lslimm) \
+ emith_eor_r_r_r_lsl(d, d, s, lslimm)
#define emith_eor_r_r_lsr(d, s, lsrimm) \
emith_eor_r_r_r_lsr(d, d, s, lsrimm)
EMIT(MIPS_NEG_REG(d, s))
#define emith_adc_r_r_r(d, s1, s2) do { \
- emith_add_r_r_r(AT, s1, FC); \
- emith_add_r_r_r(d, AT, s2); \
+ emith_add_r_r_r(AT, s2, FC); \
+ emith_add_r_r_r(d, s1, AT); \
+} while (0)
+
+#define emith_sbc_r_r_r(d, s1, s2) do { \
+ emith_add_r_r_r(AT, s2, FC); \
+ emith_sub_r_r_r(d, s1, AT); \
} while (0)
#define emith_adc_r_r(d, s) \
emith_adc_r_r_r(d, d, s)
+#define emith_negc_r_r(d, s) \
+ emith_sbc_r_r_r(d, Z0, s)
+
// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW)
// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout
#define emith_adcf_r_r_r(d, s1, s2) do { \
#define emith_eor_r_r(d, s) \
emith_eor_r_r_r(d, d, s)
-#define emith_tst_r_r_ptr(d, s) \
- emith_and_r_r_r(FNZ, d, s)
+#define emith_tst_r_r_ptr(d, s) do { \
+ if (d != s) { \
+ emith_and_r_r_r(FNZ, d, s); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
+ } else emith_cmp_rs = s, emith_cmp_rt = Z0; \
+} while (0)
#define emith_tst_r_r(d, s) \
emith_tst_r_r_ptr(d, s)
-#define emith_teq_r_r(d, s) \
- emith_eor_r_r_r(FNZ, d, s)
+#define emith_teq_r_r(d, s) do { \
+ emith_eor_r_r_r(FNZ, d, s); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
+} while (0)
#define emith_cmp_r_r(d, s) \
- emith_subf_r_r_r(FNZ, d, s)
+ emith_set_compare_flags(d, s, 0)
+// emith_subf_r_r_r(FNZ, d, s)
#define emith_addf_r_r(d, s) \
emith_addf_r_r_r(d, d, s)
emith_adcf_r_r_imm(r, r, imm)
#define emith_cmp_r_imm(r, imm) \
- emith_subf_r_r_imm(FNZ, r, (s16)imm)
-
+ emith_set_compare_flags(r, -1, imm)
+// emith_subf_r_r_imm(FNZ, r, (s16)imm)
#define emith_add_r_r_ptr_imm(d, s, imm) \
emith_arith_imm(OP_ADDIU, d, s, imm)
#define emith_addf_r_r_imm(d, s, imm) do { \
emith_add_r_r_imm(FNZ, s, imm); \
- emith_set_arith_flags(d, s, 0, imm, 0); \
+ emith_set_arith_flags(d, s, -1, imm, 0); \
} while (0)
#define emith_adc_r_r_imm(d, s, imm) do { \
} while (0)
#define emith_adcf_r_r_imm(d, s, imm) do { \
- emith_add_r_r_r(FNZ, s, FC); \
- EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \
- emith_add_r_r_imm(FNZ, FNZ, imm); \
- emith_set_arith_flags(d, s, 0, imm, 0); \
- emith_or_r_r(FC, AT); \
+ if (imm == 0) { \
+ emith_add_r_r_r(FNZ, s, FC); \
+ emith_set_arith_flags(d, s, -1, 1, 0); \
+ } else { \
+ emith_add_r_r_r(FNZ, s, FC); \
+ EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \
+ emith_add_r_r_imm(FNZ, FNZ, imm); \
+ emith_set_arith_flags(d, s, -1, imm, 0); \
+ emith_or_r_r(FC, AT); \
+ } \
} while (0)
// NB: no SUBI in MIPS II, since ADDI takes a signed imm
#define emith_subf_r_r_imm(d, s, imm) do { \
emith_sub_r_r_imm(FNZ, s, imm); \
- emith_set_arith_flags(d, s, 0, imm, 1); \
+ emith_set_arith_flags(d, s, -1, imm, 1); \
} while (0)
// logical, immediate
#define emith_bic_r_imm_c(cond, r, imm) \
emith_bic_r_imm(r, imm)
-#define emith_tst_r_imm(r, imm) \
- emith_log_imm(OP_ANDI, FNZ, r, imm)
+#define emith_tst_r_imm(r, imm) do { \
+ emith_log_imm(OP_ANDI, FNZ, r, imm); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
+} while (0)
#define emith_tst_r_imm_c(cond, r, imm) \
emith_tst_r_imm(r, imm)
EMIT(MIPS_OR_REG(d, d, AT)); \
} while (0)
+#define emith_rorc(d) do { \
+ emith_lsr(d, d, 1); \
+ emith_lsl(AT, FC, 31); \
+ emith_or_r_r(d, AT); \
+} while (0)
+
+#define emith_rolc(d) do { \
+ emith_lsl(d, d, 1); \
+ emith_or_r_r(d, FC); \
+} while (0)
+
// NB: all flag setting shifts make V undefined
// NB: mips32r2 has EXT (useful for extracting C)
#define emith_lslf(d, s, cnt) do { \
emith_lsl(d, _s, 1); \
} \
emith_move_r_r(FNZ, d); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
#define emith_lsrf(d, s, cnt) do { \
emith_lsr(d, _s, 1); \
} \
emith_move_r_r(FNZ, d); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
#define emith_asrf(d, s, cnt) do { \
emith_asr(d, _s, 1); \
} \
emith_move_r_r(FNZ, d); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
#define emith_rolf(d, s, cnt) do { \
emith_rol(d, s, cnt); \
emith_and_r_r_imm(FC, d, 1); \
emith_move_r_r(FNZ, d); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
#define emith_rorf(d, s, cnt) do { \
emith_ror(d, s, cnt); \
emith_lsr(FC, d, 31); \
emith_move_r_r(FNZ, d); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
#define emith_rolcf(d) do { \
emith_or_r_r(d, FC); \
emith_move_r_r(FC, AT); \
emith_move_r_r(FNZ, d); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
#define emith_rorcf(d) do { \
emith_or_r_r(d, FC); \
emith_move_r_r(FC, AT); \
emith_move_r_r(FNZ, d); \
+ emith_cmp_rs = emith_cmp_rt = -1; \
} while (0)
// signed/unsigned extend
(((cond) >> 5) == OP__RT ? (cond) ^ 0x01 : (cond) ^ 0x20)
// evaluate the emulated condition, returns a register/branch type pair
-static int emith_cond_check(int cond, int *r)
+static int emith_cmpr_check(int rs, int rt, int cond, int *r)
{
int b = 0;
- // shortcut for comparing 2 registers
- if (emith_flg_rs || emith_flg_rt) switch (cond) {
- case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt));
+ // condition check for comparing 2 registers
+ switch (cond) {
+ case DCOND_EQ: *r = rs; b = MIPS_BEQ|rt; break;
+ case DCOND_NE: *r = rs; b = MIPS_BNE|rt; break;
+ case DCOND_LO: EMIT(MIPS_SLTU_REG(AT, rs, rt));
+ *r = AT, b = MIPS_BNE; break; // s < t unsigned
+ case DCOND_HS: EMIT(MIPS_SLTU_REG(AT, rs, rt));
+ *r = AT, b = MIPS_BEQ; break; // s >= t unsigned
+ case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, rt, rs));
*r = AT, b = MIPS_BEQ; break; // s <= t unsigned
- case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, emith_flg_rs, emith_flg_rt));
+ case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, rt, rs));
*r = AT, b = MIPS_BNE; break; // s > t unsigned
- case DCOND_LT: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs));
+ case DCOND_LT: if (rt == 0) { *r = rs, b = MIPS_BLT; break; } // s < 0
+ EMIT(MIPS_SLT_REG(AT, rs, rt));
*r = AT, b = MIPS_BNE; break; // s < t
- case DCOND_GE: EMIT(MIPS_SLT_REG(AT, emith_flg_rt, emith_flg_rs));
+ case DCOND_GE: if (rt == 0) { *r = rs, b = MIPS_BGE; break; } // s >= 0
+ EMIT(MIPS_SLT_REG(AT, rs, rt));
*r = AT, b = MIPS_BEQ; break; // s >= t
- case DCOND_LE: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt));
+ case DCOND_LE: if (rt == 0) { *r = rs, b = MIPS_BLE; break; } // s <= 0
+ EMIT(MIPS_SLT_REG(AT, rt, rs));
*r = AT, b = MIPS_BEQ; break; // s <= t
- case DCOND_GT: EMIT(MIPS_SLT_REG(AT, emith_flg_rs, emith_flg_rt));
+ case DCOND_GT: if (rt == 0) { *r = rs, b = MIPS_BGT; break; } // s > 0
+ EMIT(MIPS_SLT_REG(AT, rt, rs));
*r = AT, b = MIPS_BNE; break; // s > t
}
+ return b;
+}
+
+static int emith_cmpi_check(int rs, s32 imm, int cond, int *r)
+{
+ int b = 0;
+
+ // condition check for comparing register with immediate
+ if (imm == 0) return emith_cmpr_check(rs, Z0, cond, r);
+ switch (cond) {
+ case DCOND_EQ: emith_move_r_imm(AT, imm);
+ *r = rs; b = MIPS_BEQ|AT; break;
+ case DCOND_NE: emith_move_r_imm(AT, imm);
+ *r = rs; b = MIPS_BNE|AT; break;
+ case DCOND_LO: EMIT(MIPS_SLTU_IMM(AT, rs, imm));
+ *r = AT, b = MIPS_BNE; break; // s < imm unsigned
+ case DCOND_HS: EMIT(MIPS_SLTU_IMM(AT, rs, imm));
+ *r = AT, b = MIPS_BEQ; break; // s >= imm unsigned
+ case DCOND_LS: emith_move_r_imm(AT, imm);
+ EMIT(MIPS_SLTU_REG(AT, AT, rs));
+ *r = AT, b = MIPS_BEQ; break; // s <= imm unsigned
+ case DCOND_HI: emith_move_r_imm(AT, imm);
+ EMIT(MIPS_SLTU_REG(AT, AT, rs));
+ *r = AT, b = MIPS_BNE; break; // s > imm unsigned
+ case DCOND_LT: EMIT(MIPS_SLT_IMM(AT, rs, imm));
+ *r = AT, b = MIPS_BNE; break; // s < imm
+ case DCOND_GE: EMIT(MIPS_SLT_IMM(AT, rs, imm));
+ *r = AT, b = MIPS_BEQ; break; // s >= imm
+ case DCOND_LE: emith_move_r_imm(AT, imm);
+ EMIT(MIPS_SLT_REG(AT, AT, rs));
+ *r = AT, b = MIPS_BEQ; break; // s <= imm
+ case DCOND_GT: emith_move_r_imm(AT, imm);
+ EMIT(MIPS_SLT_REG(AT, AT, rs));
+ *r = AT, b = MIPS_BNE; break; // s > imm
+ }
+ return b;
+}
+
+static int emith_cond_check(int cond, int *r)
+{
+ int b = 0;
+
+ if (emith_cmp_rs >= 0) {
+ if (emith_cmp_rt != -1)
+ b = emith_cmpr_check(emith_cmp_rs,emith_cmp_rt, cond,r);
+ else b = emith_cmpi_check(emith_cmp_rs,emith_cmp_imm,cond,r);
+ }
+
// shortcut for V known to be 0
if (!b && emith_flg_noV) switch (cond) {
case DCOND_VS: *r = Z0; b = MIPS_BNE; break; // never
#define emith_sh2_div1_step(rn, rm, sr) do { \
emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
EMITH_JMP3_START(DCOND_EQ); \
+ EMITH_HINT_COND(DCOND_CS); \
emith_addf_r_r(rn, rm); \
EMITH_JMP3_MID(DCOND_EQ); \
+ EMITH_HINT_COND(DCOND_CS); \
emith_subf_r_r(rn, rm); \
EMITH_JMP3_END(); \
emith_eor_r_r(sr, FC); \
} while (0)
#define emith_write_sr(sr, srcr) do { \
- emith_lsr(sr, sr, 10); \
- emith_or_r_r_r_lsl(sr, sr, srcr, 22); \
- emith_ror(sr, sr, 22); \
+ emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \
+ emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \
+ emith_or_r_r(sr, AT); \
+} while (0)
+
+#define emith_carry_to_t(sr, is_sub) do { \
+ emith_and_r_imm(sr, 0xfffffffe); \
+ emith_or_r_r(sr, FC); \
} while (0)
-#define emith_carry_to_t(srr, is_sub) do { \
- emith_lsr(sr, sr, 1); \
- emith_adc_r_r(sr, sr); \
+#define emith_t_to_carry(sr, is_sub) do { \
+ emith_and_r_r_imm(FC, sr, 1); \
} while (0)
#define emith_tpop_carry(sr, is_sub) do { \
emith_and_r_r_imm(FC, sr, 1); \
- emith_lsr(sr, sr, 1); \
+ emith_eor_r_r(sr, FC); \
} while (0)
#define emith_tpush_carry(sr, is_sub) \
- emith_adc_r_r(sr, sr)
+ emith_or_r_r(sr, FC)
#ifdef T
// T bit handling
static void emith_set_t_cond(int sr, int cond)
{
- EMITH_SJMP_START(emith_invert_cond(cond));
- emith_or_r_imm_c(cond, sr, T);
- EMITH_SJMP_END(emith_invert_cond(cond));
+ int b, r;
+ u8 *ptr;
+ u32 val = 0, inv = 0;
+
+ // try to avoid jumping around if possible
+ if (emith_cmp_rs >= 0) {
+ if (emith_cmp_rt >= 0)
+ b = emith_cmpr_check(emith_cmp_rs, emith_cmp_rt, cond, &r);
+ else
+ b = emith_cmpi_check(emith_cmp_rs, emith_cmp_imm, cond, &r);
+
+ // XXX this relies on the inner workings of cmp_check...
+ if (r == AT)
+ // result of slt check which returns either 0 or 1 in AT
+ val++, inv = (b == MIPS_BEQ);
+ } else {
+ b = emith_cond_check(cond, &r);
+ if (r == Z0) {
+ if (b == MIPS_BEQ || b == MIPS_BLE || b == MIPS_BGE)
+ emith_or_r_imm(sr, T);
+ return;
+ } else if (r == FC)
+ val++, inv = (b == MIPS_BEQ);
+ }
+
+ if (!val) switch (b) { // cases: b..z r, aka cmp r,Z0 or cmp r,#0
+ case MIPS_BEQ: EMIT(MIPS_SLTU_IMM(AT, r, 1)); r=AT; val++; break;
+ case MIPS_BNE: EMIT(MIPS_SLTU_REG(AT,Z0, r)); r=AT; val++; break;
+ case MIPS_BLT: EMIT(MIPS_SLT_REG(AT, r, Z0)); r=AT; val++; break;
+ case MIPS_BGE: EMIT(MIPS_SLT_REG(AT, r, Z0)); r=AT; val++; inv++; break;
+ case MIPS_BLE: EMIT(MIPS_SLT_REG(AT, Z0, r)); r=AT; val++; inv++; break;
+ case MIPS_BGT: EMIT(MIPS_SLT_REG(AT, Z0, r)); r=AT; val++; break;
+ default: // cases: beq/bne r,s, aka cmp r,s
+ if ((b>>5) == OP_BEQ) {
+ EMIT(MIPS_XOR_REG(AT, r, b&0x1f));
+ EMIT(MIPS_SLTU_IMM(AT,AT, 1)); r=AT; val++; break;
+ } else if ((b>>5) == OP_BNE) {
+ EMIT(MIPS_XOR_REG(AT, r, b&0x1f));
+ EMIT(MIPS_SLTU_IMM(AT,Z0,AT)); r=AT; val++; break;
+ }
+ }
+ if (val) {
+ emith_or_r_r(sr, r);
+ if (inv)
+ emith_eor_r_imm(sr, T);
+ return;
+ }
+
+ // can't obtain result directly, use presumably slower jump !cond + or sr,T
+ b = emith_invert_branch(b);
+ ptr = emith_branch(MIPS_BCONDZ(b, r, 0));
+ emith_or_r_imm(sr, T);
+ emith_flush(); // prohibit delay slot switching across jump targets
+ val = (u8 *)tcache_ptr - (u8 *)(ptr) - 4;
+ EMIT_PTR(ptr, MIPS_BCONDZ(b, r, val & 0x0003ffff));
}
#define emith_get_t_cond() -1
rcache_free_tmp(tmp_); \
} else emith_or_r_r_r(d, s1, s2); \
} while (0)
+#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) do { \
+ if (lsrimm) { \
+ int tmp_ = rcache_get_tmp(); \
+ emith_lsr(tmp_, s2, lsrimm); \
+ emith_or_r_r_r(d, s1, tmp_); \
+ rcache_free_tmp(tmp_); \
+ } else emith_or_r_r_r(d, s1, s2); \
+} while (0)
// _r_r_shift
#define emith_or_r_r_lsl(d, s, lslimm) \
emith_or_r_r_r_lsl(d, d, s, lslimm)
+#define emith_or_r_r_lsr(d, s, lsrimm) \
+ emith_or_r_r_r_lsr(d, d, s, lsrimm)
+#define emith_eor_r_r_lsl(d, s, lslimm) do { \
+ if (lslimm) { \
+ int tmp_ = rcache_get_tmp(); \
+ emith_lsl(tmp_, s, lslimm); \
+ emith_eor_r_r(d, tmp_); \
+ rcache_free_tmp(tmp_); \
+ } else emith_eor_r_r(d, s); \
+} while (0)
#define emith_eor_r_r_lsr(d, s, lsrimm) do { \
if (lsrimm) { \
int tmp_ = rcache_get_tmp(); \
#define EMITH_SJMP2_END(cond) \
EMITH_SJMP3_END()
+#define EMITH_HINT_COND(cond) /**/
+
#define emith_pass_arg_r(arg, reg) do { \
int rd = 7; \
host_arg2reg(rd, arg); \
emith_rol(sr, sr, 1); \
} while (0)
+#define emith_t_to_carry(sr, is_sub) do { \
+ emith_ror(sr, sr, 1); \
+ emith_rol(sr, sr, 1); \
+} while (0)
+
#define emith_tpop_carry(sr, is_sub) \
emith_lsr(sr, sr, 1)
#define REMAP_REGISTER 1
#define LOOP_DETECTION 1
#define LOOP_OPTIMIZER 1
+#define T_OPTIMIZER 1
// limits (per block)
#define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6)
#define GET_Rn() \
((op >> 8) & 0x0f)
-#define SHR_T SHR_SR // might make them separate someday
+#define SHR_T 30 // separate T for not-used detection
#define SHR_MEM 31
#define SHR_TMP -1
#define I_SHIFT 4
#define Q_SHIFT 8
#define M_SHIFT 9
+#define T_SHIFT 11
static struct op_data {
u8 op;
return block;
}
#endif
-// } debug
#define TCACHE_BUFFERS 3
FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i,
if (guest_regs[i].flags & GRF_DIRTY) {
// if a dirty reg is unmapped save its value to context
- if (~rcache_regs_discard & (1 << i))
+ if ((~rcache_regs_discard | rcache_regs_now) & (1 << i))
emith_ctx_write(cache_regs[x].hreg, i * 4);
guest_regs[i].flags &= ~GRF_DIRTY;
}
if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) {
if (guest_regs[r].vreg != guest_regs[r].sreg &&
!cache_regs[guest_regs[r].sreg].locked &&
- (~rcache_regs_discard & (1 << r)) &&
+ ((~rcache_regs_discard | rcache_regs_now) & (1 << r)) &&
!(rns & cache_regs[guest_regs[r].sreg].gregs)) {
// statically mapped reg not in its sreg. move back to sreg
rcache_evict_vreg(guest_regs[r].sreg);
// cannot remap. keep dirty for writeback in unmap
cache_regs[x].flags |= HRF_DIRTY;
} else {
- if (~rcache_regs_discard & (1 << r))
+ if ((~rcache_regs_discard | rcache_regs_now) & (1 << r))
emith_ctx_write(cache_regs[x].hreg, r * 4);
guest_regs[r].flags &= ~GRF_DIRTY;
}
if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) &&
guest_regs[r].sreg == dst && !tr->locked) {
// split aliases if r is STATIC in sreg and dst isn't already locked
- rcache_lock_vreg(dst); // lock to avoid evicting dst
- x = rcache_allocate_vreg(rsp_d & ali);
- rcache_unlock_vreg(dst);
+ int t;
+ FOR_ALL_BITS_SET_DO(ali, t,
+ if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) &&
+ !(ali & ~(1 << t)) &&
+ !cache_regs[guest_regs[t].sreg].locked &&
+ !(rsp_d & cache_regs[guest_regs[t].sreg].gregs)) {
+ // alias is a single STATIC and its sreg is available
+ x = guest_regs[t].sreg;
+ rcache_evict_vreg(x);
+ } else {
+ rcache_lock_vreg(dst); // lock to avoid evicting dst
+ x = rcache_allocate_vreg(rsp_d & ali);
+ rcache_unlock_vreg(dst);
+ }
+ break;
+ )
if (x >= 0) {
src = x;
rcache_move_vreg(src, dst);
}
#define DELAY_SAVE_T(sr) { \
+ int t_ = rcache_get_tmp(); \
emith_bic_r_imm(sr, T_save); \
- emith_tst_r_imm(sr, T); \
- EMITH_SJMP_START(DCOND_EQ); \
- emith_or_r_imm_c(DCOND_NE, sr, T_save); \
- EMITH_SJMP_END(DCOND_EQ); \
+ emith_and_r_r_imm(t_, sr, 1); \
+ emith_or_r_r_lsl(sr, t_, T_SHIFT); \
+ rcache_free_tmp(t_); \
}
#define FLUSH_CYCLES(sr) \
ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, );
if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc)
op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change
+ // unify T and SR since rcache doesn't know about "virtual" guest regs
+ if (ops[i].source & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR);
+ if (ops[i].dest & BITMASK1(SHR_T)) ops[i].dest |= BITMASK1(SHR_SR);
#if LOOP_DETECTION
// loop types detected:
// 1. target: ... BRA target -> idle loop
drcf.pending_branch_indirect = 1; // conditions g,h - cond.branch
// poll/idle loops terminate with their backwards branch to the loop start
if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) {
- m2 &= ~(m1 | BITMASK2(SHR_PC, SHR_SR)); // conditions d,e + g,h
+ m2 &= ~(m1 | BITMASK3(SHR_PC, SHR_SR, SHR_T)); // conditions d,e + g,h
if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect)))
op = 0; // conditions not met
op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type
drcf.loop_type = 0;
#if LOOP_OPTIMIZER
if (op_flags[v] & OF_BASIC_LOOP) {
- m3 &= ~rcache_regs_static & ~BITMASK4(SHR_PC, SHR_PR, SHR_SR, SHR_MEM);
- if (m3 && count_bits(m3) < count_bits(rcache_hregs_reg) &&
+ m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM);
+ if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) &&
pinned_loop_count < ARRAY_SIZE(pinned_loop_pc)-1) {
pinned_loop_mask[pinned_loop_count] = m3;
pinned_loop_pc[pinned_loop_count++] = base_pc + 2*v;
rcache_free_tmp(tmp3);
#endif
+ // check cycles
+ sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
+
#if LOOP_OPTIMIZER
if (op_flags[i] & OF_BASIC_LOOP) {
if (pinned_loop_pc[pinned_loop_count] == pc) {
// pin needed regs on loop entry
FOR_ALL_BITS_SET_DO(pinned_loop_mask[pinned_loop_count], v, rcache_pin_reg(v));
emith_flush();
+ // store current PC as loop target
pinned_loop_ptr[pinned_loop_count] = tcache_ptr;
} else
op_flags[i] &= ~OF_BASIC_LOOP;
}
-#endif
- // check cycles
- sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
- emith_cmp_r_imm(sr, 0);
-
-#if LOOP_OPTIMIZER
- void *jp = NULL;
if (op_flags[i] & OF_BASIC_LOOP) {
// if exiting a pinned loop pinned regs must be written back to ctx
// since they are reloaded in the loop entry code
- jp = tcache_ptr;
- emith_jump_cond_patchable(DCOND_GT, jp); // XXX need API for JMP_POS
+ emith_cmp_r_imm(sr, 0);
+ EMITH_JMP_START(DCOND_GT);
rcache_save_pinned();
- }
+
+ if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
+ // exit via stub in blx table (saves some 1-3 insns in the main flow)
+ blx_target_ptr[blx_target_count] = tcache_ptr;
+ blx_target_pc[blx_target_count] = pc|1;
+ blx_target_bl[blx_target_count++] = NULL;
+ emith_jump_patchable(tcache_ptr);
+ } else {
+ // blx table full, must inline exit code
+ tmp = rcache_get_tmp_arg(0);
+ emith_move_r_imm(tmp, pc);
+ emith_jump(sh2_drc_exit);
+ rcache_free_tmp(tmp);
+ }
+ EMITH_JMP_END(DCOND_GT);
+ } else
#endif
- if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
- // exit via stub in blx table (saves some 1-3 insns in the main flow)
- blx_target_pc[blx_target_count] = pc|1;
- blx_target_bl[blx_target_count] = NULL;
- blx_target_ptr[blx_target_count++] = tcache_ptr;
- } else {
- // blx table full, must inline exit code
- tmp = rcache_get_tmp_arg(0);
- emith_move_r_imm_c(DCOND_LE, tmp, pc);
- rcache_free_tmp(tmp);
+ {
+ if (blx_target_count < ARRAY_SIZE(blx_target_pc)) {
+ // exit via stub in blx table (saves some 1-3 insns in the main flow)
+ blx_target_pc[blx_target_count] = pc|1;
+ blx_target_bl[blx_target_count] = NULL;
+ emith_cmp_r_imm(sr, 0);
+ blx_target_ptr[blx_target_count++] = tcache_ptr;
+ emith_jump_cond_patchable(DCOND_LE, tcache_ptr);
+ } else {
+ // blx table full, must inline exit code
+ tmp = rcache_get_tmp_arg(0);
+ emith_cmp_r_imm(sr, 0);
+ EMITH_SJMP_START(DCOND_GT);
+ emith_move_r_imm_c(DCOND_LE, tmp, pc);
+ emith_jump_cond(DCOND_LE, sh2_drc_exit);
+ EMITH_SJMP_END(DCOND_GT);
+ rcache_free_tmp(tmp);
+ }
}
- emith_jump_cond_patchable(DCOND_LE, tcache_ptr);
-#if LOOP_OPTIMIZER
- if (op_flags[i] & OF_BASIC_LOOP)
- emith_jump_patch(jp, tcache_ptr, NULL);
-#endif
#if (DRC_DEBUG & 32)
// block hit counter
rcache_set_usage_now(opd[0].source); // current insn
rcache_set_usage_soon(soon); // insns 1-4
rcache_set_usage_late(late & ~soon); // insns 5-9
- rcache_set_usage_discard(write & ~(late|soon|opd[0].source));
+ rcache_set_usage_discard(write & ~(late|soon));
if (v <= 9)
// upcoming rcache_flush, start writing back unused dirty stuff
rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest));
{
case 0: // CLRT 0000000000001000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
- emith_set_t(sr, 0);
+#if T_OPTIMIZER
+ if (~rcache_regs_discard & BITMASK1(SHR_T))
+#endif
+ emith_set_t(sr, 0);
break;
case 1: // SETT 0000000000011000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
- emith_set_t(sr, 1);
+#if T_OPTIMIZER
+ if (~rcache_regs_discard & BITMASK1(SHR_T))
+#endif
+ emith_set_t(sr, 1);
break;
case 2: // CLRMAC 0000000000101000
emit_move_r_imm32(SHR_MACL, 0);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
+ tmp = rcache_get_tmp();
emith_invalidate_t();
emith_bic_r_imm(sr, M|Q|T);
- emith_tst_r_imm(tmp2, (1<<31));
- EMITH_SJMP_START(DCOND_EQ);
- emith_or_r_imm_c(DCOND_NE, sr, Q);
- EMITH_SJMP_END(DCOND_EQ);
- emith_tst_r_imm(tmp3, (1<<31));
- EMITH_SJMP_START(DCOND_EQ);
- emith_or_r_imm_c(DCOND_NE, sr, M);
- EMITH_SJMP_END(DCOND_EQ);
- emith_teq_r_r(tmp2, tmp3);
- EMITH_SJMP_START(DCOND_PL);
- emith_or_r_imm_c(DCOND_MI, sr, T);
- EMITH_SJMP_END(DCOND_PL);
+ emith_lsr(tmp, tmp2, 31); // Q = Nn
+ emith_or_r_r_lsl(sr, tmp, Q_SHIFT);
+ emith_lsr(tmp, tmp3, 31); // M = Nm
+ emith_or_r_r_lsl(sr, tmp, M_SHIFT);
+ emith_eor_r_r_lsr(tmp, tmp2, 31);
+ emith_or_r_r(sr, tmp); // T = Q^M
+ rcache_free(tmp);
goto end_op;
case 0x08: // TST Rm,Rn 0010nnnnmmmm1000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
- emith_clr_t_cond(sr);
- emith_cmp_r_r(tmp2, tmp3);
switch (op & 0x07)
{
case 0x00: // CMP/EQ
- emith_set_t_cond(sr, DCOND_EQ);
+ tmp = DCOND_EQ;
break;
case 0x02: // CMP/HS
- emith_set_t_cond(sr, DCOND_HS);
+ tmp = DCOND_HS;
break;
case 0x03: // CMP/GE
- emith_set_t_cond(sr, DCOND_GE);
+ tmp = DCOND_GE;
break;
case 0x06: // CMP/HI
- emith_set_t_cond(sr, DCOND_HI);
+ tmp = DCOND_HI;
break;
case 0x07: // CMP/GT
- emith_set_t_cond(sr, DCOND_GT);
+ tmp = DCOND_GT;
break;
}
+ emith_clr_t_cond(sr);
+ emith_cmp_r_r(tmp2, tmp3);
+ emith_set_t_cond(sr, tmp);
goto end_op;
case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100
// Q1 = carry(Rn = (Rn << 1) | T)
// Q = M ^ Q1 ^ Q2
// T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2)
tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
- tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp);
+ tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp4);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr);
+ EMITH_HINT_COND(DCOND_CS);
emith_tpop_carry(sr, 0);
- emith_adcf_r_r_r(tmp2, tmp, tmp);
+ emith_adcf_r_r_r(tmp2, tmp4, tmp4);
emith_tpush_carry(sr, 0); // keep Q1 in T for now
- rcache_free(tmp);
- tmp4 = rcache_get_tmp();
- emith_and_r_r_imm(tmp4, sr, M);
- emith_eor_r_r_lsr(sr, tmp4, M_SHIFT - Q_SHIFT); // Q ^= M
- rcache_free_tmp(tmp4);
+ rcache_free(tmp4);
+ tmp = rcache_get_tmp();
+ emith_and_r_r_imm(tmp, sr, M);
+ emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M
+ rcache_free_tmp(tmp);
// add or sub, invert T if carry to get Q1 ^ Q2
// in: (Q ^ M) passed in Q, Q1 in T
emith_sh2_div1_step(tmp2, tmp3, sr);
- emith_bic_r_imm(sr, Q);
- emith_tst_r_imm(sr, M);
- EMITH_SJMP_START(DCOND_EQ);
- emith_or_r_imm_c(DCOND_NE, sr, Q); // Q = M
- EMITH_SJMP_END(DCOND_EQ);
- emith_tst_r_imm(sr, T);
- EMITH_SJMP_START(DCOND_EQ);
- emith_eor_r_imm_c(DCOND_NE, sr, Q); // Q = M ^ Q1 ^ Q2
- EMITH_SJMP_END(DCOND_EQ);
+ tmp = rcache_get_tmp();
+ emith_bic_r_imm(sr, Q); // Q = M
+ emith_and_r_r_imm(tmp, sr, M);
+ emith_or_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT);
+ emith_and_r_r_imm(tmp, sr, T); // Q = M ^ Q1 ^ Q2
+ emith_eor_r_r_lsl(sr, tmp, Q_SHIFT);
emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2)
goto end_op;
case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr);
- if (op & 4) { // adc
- emith_tpop_carry(sr, 0);
- emith_adcf_r_r_r(tmp, tmp3, tmp2);
- emith_tpush_carry(sr, 0);
- } else {
- emith_tpop_carry(sr, 1);
- emith_sbcf_r_r_r(tmp, tmp3, tmp2);
- emith_tpush_carry(sr, 1);
+#if T_OPTIMIZER
+ if (rcache_regs_discard & BITMASK1(SHR_T)) {
+ if (op & 4) {
+ emith_t_to_carry(sr, 0);
+ emith_adc_r_r_r(tmp, tmp3, tmp2);
+ } else {
+ emith_t_to_carry(sr, 1);
+ emith_sbc_r_r_r(tmp, tmp3, tmp2);
+ }
+ } else
+#endif
+ {
+ EMITH_HINT_COND(DCOND_CS);
+ if (op & 4) { // adc
+ emith_tpop_carry(sr, 0);
+ emith_adcf_r_r_r(tmp, tmp3, tmp2);
+ emith_tpush_carry(sr, 0);
+ } else {
+ emith_tpop_carry(sr, 1);
+ emith_sbcf_r_r_r(tmp, tmp3, tmp2);
+ emith_tpush_carry(sr, 1);
+ }
}
goto end_op;
case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011
tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
- emith_clr_t_cond(sr);
- if (op & 4) {
- emith_addf_r_r_r(tmp, tmp3, tmp2);
+#if T_OPTIMIZER
+ if (rcache_regs_discard & BITMASK1(SHR_T)) {
+ if (op & 4)
+ emith_add_r_r_r(tmp,tmp3,tmp2);
+ else
+ emith_sub_r_r_r(tmp,tmp3,tmp2);
} else
- emith_subf_r_r_r(tmp, tmp3, tmp2);
- emith_set_t_cond(sr, DCOND_VS);
+#endif
+ {
+ emith_clr_t_cond(sr);
+ EMITH_HINT_COND(DCOND_VS);
+ if (op & 4)
+ emith_addf_r_r_r(tmp, tmp3, tmp2);
+ else
+ emith_subf_r_r_r(tmp, tmp3, tmp2);
+ emith_set_t_cond(sr, DCOND_VS);
+ }
goto end_op;
case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
case 2: // SHAL Rn 0100nnnn00100000
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
- emith_invalidate_t();
- emith_lslf(tmp, tmp2, 1);
- emith_carry_to_t(sr, 0);
+#if T_OPTIMIZER
+ if (rcache_regs_discard & BITMASK1(SHR_T))
+ emith_lsl(tmp, tmp2, 1);
+ else
+#endif
+ {
+ emith_invalidate_t();
+ emith_lslf(tmp, tmp2, 1);
+ emith_carry_to_t(sr, 0);
+ }
goto end_op;
case 1: // DT Rn 0100nnnn00010000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
#endif
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
emith_clr_t_cond(sr);
+ EMITH_HINT_COND(DCOND_EQ);
emith_subf_r_r_imm(tmp, tmp2, 1);
emith_set_t_cond(sr, DCOND_EQ);
goto end_op;
case 2: // SHAR Rn 0100nnnn00100001
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
- emith_invalidate_t();
- if (op & 0x20) {
- emith_asrf(tmp, tmp2, 1);
+#if T_OPTIMIZER
+ if (rcache_regs_discard & BITMASK1(SHR_T)) {
+ if (op & 0x20)
+ emith_asr(tmp,tmp2,1);
+ else
+ emith_lsr(tmp,tmp2,1);
} else
- emith_lsrf(tmp, tmp2, 1);
- emith_carry_to_t(sr, 0);
+#endif
+ {
+ emith_invalidate_t();
+ if (op & 0x20) {
+ emith_asrf(tmp, tmp2, 1);
+ } else
+ emith_lsrf(tmp, tmp2, 1);
+ emith_carry_to_t(sr, 0);
+ }
goto end_op;
case 1: // CMP/PZ Rn 0100nnnn00010001
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
case 0x05: // ROTR Rn 0100nnnn00000101
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
- emith_invalidate_t();
- if (op & 1) {
- emith_rorf(tmp, tmp2, 1);
+#if T_OPTIMIZER
+ if (rcache_regs_discard & BITMASK1(SHR_T)) {
+ if (op & 1)
+ emith_ror(tmp, tmp2, 1);
+ else
+ emith_rol(tmp, tmp2, 1);
} else
- emith_rolf(tmp, tmp2, 1);
- emith_carry_to_t(sr, 0);
+#endif
+ {
+ emith_invalidate_t();
+ if (op & 1)
+ emith_rorf(tmp, tmp2, 1);
+ else
+ emith_rolf(tmp, tmp2, 1);
+ emith_carry_to_t(sr, 0);
+ }
goto end_op;
case 0x24: // ROTCL Rn 0100nnnn00100100
case 0x25: // ROTCR Rn 0100nnnn00100101
tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr);
- emith_tpop_carry(sr, 0);
- if (op & 1) {
- emith_rorcf(tmp);
+#if T_OPTIMIZER
+ if (rcache_regs_discard & BITMASK1(SHR_T)) {
+ emith_t_to_carry(sr, 0);
+ if (op & 1)
+ emith_rorc(tmp);
+ else
+ emith_rolc(tmp);
} else
- emith_rolcf(tmp);
- emith_tpush_carry(sr, 0);
+#endif
+ {
+ emith_tpop_carry(sr, 0);
+ if (op & 1)
+ emith_rorcf(tmp);
+ else
+ emith_rolcf(tmp);
+ emith_tpush_carry(sr, 0);
+ }
goto end_op;
case 0x15: // CMP/PL Rn 0100nnnn00010101
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr);
- emith_tpop_carry(sr, 1);
- emith_negcf_r_r(tmp2, tmp);
- emith_tpush_carry(sr, 1);
+#if T_OPTIMIZER
+ if (rcache_regs_discard & BITMASK1(SHR_T)) {
+ emith_t_to_carry(sr, 1);
+ emith_negc_r_r(tmp2, tmp);
+ } else
+#endif
+ {
+ EMITH_HINT_COND(DCOND_CS);
+ emith_tpop_carry(sr, 1);
+ emith_negcf_r_r(tmp2, tmp);
+ emith_tpush_carry(sr, 1);
+ }
break;
case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011
emith_neg_r_r(tmp2, tmp);
host_arg2reg(arg2, 2);
host_arg2reg(arg3, 3);
emith_move_r_r(arg0, arg0); // nop
- emith_move_r_r(arg1, arg1); // nop
- emith_move_r_r(arg2, arg2); // nop
- emith_move_r_r(arg3, arg3); // nop
emith_flush();
// sh2_drc_write8(u32 a, u32 d)
// d = sh2_drc_read8(u32 a)
sh2_drc_read8 = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map));
+ EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3);
// d = sh2_drc_read16(u32 a)
sh2_drc_read16 = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map));
+ EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3);
// d = sh2_drc_read32(u32 a)
sh2_drc_read32 = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map));
+ EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CS);
emith_and_r_r_c(DCOND_CC, arg0, arg3);
// d = sh2_drc_read8_poll(u32 a)
sh2_drc_read8_poll = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map));
+ EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CC);
emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
// d = sh2_drc_read16_poll(u32 a)
sh2_drc_read16_poll = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map));
+ EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CC);
emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
// d = sh2_drc_read32_poll(u32 a)
sh2_drc_read32_poll = (void *)tcache_ptr;
emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map));
+ EMITH_HINT_COND(DCOND_CS);
emith_sh2_rcall(arg0, arg1, arg2, arg3);
EMITH_SJMP_START(DCOND_CC);
emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG);
emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx));
emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0);
emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache));
- emith_cmp_r_r(arg0, arg3);
#if (DRC_DEBUG & 128)
+ emith_cmp_r_r(arg0, arg3);
EMITH_SJMP_START(DCOND_EQ);
emith_move_r_ptr_imm(arg3, (uptr)&rcmiss);
emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
emith_add_r_imm_c(DCOND_NE, arg1, 1);
emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0);
+ emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
EMITH_SJMP_END(DCOND_EQ);
-#endif
+#else
+ emith_cmp_r_r(arg0, arg3);
emith_jump_cond(DCOND_NE, sh2_drc_dispatcher);
+#endif
emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *));
emith_sub_r_imm(arg2, 2*sizeof(void *));
emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *));
emith_sub_r_imm(tmp, 4*2);
rcache_clean();
// push SR
- tmp = rcache_get_reg_arg(0, SHR_SP,&tmp2);
+ tmp = rcache_get_reg_arg(0, SHR_SP, &tmp2);
emith_add_r_r_imm(tmp, tmp2, 4);
tmp = rcache_get_reg_arg(1, SHR_SR, NULL);
emith_clear_msb(tmp, tmp, 22);
else if ((lowest_mova && lowest_mova <= pc) ||
(lowest_literal && lowest_literal <= pc))
break; // text area collides with data area
+ else if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &i_end))
+ break; // branch target already compiled
op = FETCH_OP(pc);
switch ((op & 0xf000) >> 12)
switch (GET_Fx())
{
case 0: // STC SR,Rn 0000nnnn00000010
- tmp = SHR_SR;
+ tmp = BITMASK2(SHR_SR, SHR_T);
break;
case 1: // STC GBR,Rn 0000nnnn00010010
- tmp = SHR_GBR;
+ tmp = BITMASK1(SHR_GBR);
break;
case 2: // STC VBR,Rn 0000nnnn00100010
- tmp = SHR_VBR;
+ tmp = BITMASK1(SHR_VBR);
break;
default:
goto undefined;
}
opd->op = OP_MOVE;
- opd->source = BITMASK1(tmp);
+ opd->source = tmp;
opd->dest = BITMASK1(GET_Rn());
break;
case 0x03:
opd->imm = 1;
break;
case 2: // CLRMAC 0000000000101000
- opd->dest = BITMASK3(SHR_T, SHR_MACL, SHR_MACH);
+ opd->dest = BITMASK2(SHR_MACL, SHR_MACH);
break;
default:
goto undefined;
case 2: // RTE 0000000000101011
opd->op = OP_RTE;
opd->source = BITMASK1(SHR_SP);
- opd->dest = BITMASK3(SHR_SP, SHR_SR, SHR_PC);
+ opd->dest = BITMASK4(SHR_SP, SHR_SR, SHR_T, SHR_PC);
opd->cycles = 4;
next_is_delay = 1;
end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET);
break;
case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111
opd->source = BITMASK2(GET_Rm(), GET_Rn());
- opd->dest = BITMASK1(SHR_SR);
+ opd->dest = BITMASK2(SHR_SR, SHR_T);
break;
case 0x08: // TST Rm,Rn 0010nnnnmmmm1000
opd->source = BITMASK2(GET_Rm(), GET_Rn());
opd->dest = BITMASK1(SHR_T);
break;
case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100
- opd->source = BITMASK3(GET_Rm(), GET_Rn(), SHR_SR);
- opd->dest = BITMASK2(GET_Rn(), SHR_SR);
+ opd->source = BITMASK4(GET_Rm(), GET_Rn(), SHR_SR, SHR_T);
+ opd->dest = BITMASK3(GET_Rn(), SHR_SR, SHR_T);
break;
case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101
case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101
switch (op & 0x3f)
{
case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010
- tmp = SHR_MACH;
+ tmp = BITMASK1(SHR_MACH);
break;
case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010
- tmp = SHR_MACL;
+ tmp = BITMASK1(SHR_MACL);
break;
case 0x22: // STS.L PR,@-Rn 0100nnnn00100010
- tmp = SHR_PR;
+ tmp = BITMASK1(SHR_PR);
break;
case 0x03: // STC.L SR,@-Rn 0100nnnn00000011
- tmp = SHR_SR;
+ tmp = BITMASK2(SHR_SR, SHR_T);
opd->cycles = 2;
break;
case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011
- tmp = SHR_GBR;
+ tmp = BITMASK1(SHR_GBR);
opd->cycles = 2;
break;
case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011
- tmp = SHR_VBR;
+ tmp = BITMASK1(SHR_VBR);
opd->cycles = 2;
break;
default:
goto undefined;
}
- opd->source = BITMASK2(GET_Rn(), tmp);
+ opd->source = BITMASK1(GET_Rn()) | tmp;
opd->dest = BITMASK2(GET_Rn(), SHR_MEM);
break;
case 0x04:
switch (op & 0x3f)
{
case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110
- tmp = SHR_MACH;
+ tmp = BITMASK1(SHR_MACH);
break;
case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110
- tmp = SHR_MACL;
+ tmp = BITMASK1(SHR_MACL);
break;
case 0x26: // LDS.L @Rm+,PR 0100mmmm00100110
- tmp = SHR_PR;
+ tmp = BITMASK1(SHR_PR);
break;
case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111
- tmp = SHR_SR;
+ tmp = BITMASK2(SHR_SR, SHR_T);
opd->op = OP_LDC;
opd->cycles = 3;
break;
case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111
- tmp = SHR_GBR;
+ tmp = BITMASK1(SHR_GBR);
opd->op = OP_LDC;
opd->cycles = 3;
break;
case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111
- tmp = SHR_VBR;
+ tmp = BITMASK1(SHR_VBR);
opd->op = OP_LDC;
opd->cycles = 3;
break;
goto undefined;
}
opd->source = BITMASK2(GET_Rn(), SHR_MEM);
- opd->dest = BITMASK2(GET_Rn(), tmp);
+ opd->dest = BITMASK1(GET_Rn()) | tmp;
break;
case 0x08:
case 0x09:
switch (GET_Fx())
{
case 0: // LDC Rm,SR 0100mmmm00001110
- tmp = SHR_SR;
+ tmp = BITMASK2(SHR_SR, SHR_T);
break;
case 1: // LDC Rm,GBR 0100mmmm00011110
- tmp = SHR_GBR;
+ tmp = BITMASK1(SHR_GBR);
break;
case 2: // LDC Rm,VBR 0100mmmm00101110
- tmp = SHR_VBR;
+ tmp = BITMASK1(SHR_VBR);
break;
default:
goto undefined;
}
opd->op = OP_LDC;
opd->source = BITMASK1(GET_Rn());
- opd->dest = BITMASK1(tmp);
+ opd->dest = tmp;
break;
case 0x0f:
// MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111
break;
case 0x0300: // TRAPA #imm 11000011iiiiiiii
opd->op = OP_TRAPA;
- opd->source = BITMASK3(SHR_SP, SHR_PC, SHR_SR);
+ opd->source = BITMASK4(SHR_SP, SHR_PC, SHR_SR, SHR_T);
opd->dest = BITMASK2(SHR_SP, SHR_PC);
opd->imm = (op & 0xff);
opd->cycles = 8;
last_btarget = 0;
op = 0; // delay/poll insns counter
for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) {
- int null;
- if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null))
- break; // branch target already compiled
opd = &ops[i];
crc += FETCH_OP(pc);
if (active_sh2 != NULL)
m68k_cycles = sh2_cycles_done_m68k(active_sh2);
+ // find top bit = highest irq number (0 <= irl <= 14/2) by binary search
+
// msh2
irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[0];
- while ((irqs >>= 1))
- mlvl++;
- mlvl *= 2;
+ if (irqs >= 0x10) mlvl += 8, irqs >>= 4;
+ if (irqs >= 0x04) mlvl += 4, irqs >>= 2;
+ if (irqs >= 0x02) mlvl += 2, irqs >>= 1;
// ssh2
irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[1];
- while ((irqs >>= 1))
- slvl++;
- slvl *= 2;
+ if (irqs >= 0x10) slvl += 8, irqs >>= 4;
+ if (irqs >= 0x04) slvl += 4, irqs >>= 2;
+ if (irqs >= 0x02) slvl += 2, irqs >>= 1;
mrun = sh2_irl_irq(&msh2, mlvl, msh2.state & SH2_STATE_RUN);
if (mrun) {