switch (op) {
case A_OP_MOV:
+ case A_OP_MVN:
rn = 0;
// use MVN if more bits 1 than 0
if (count_bits(imm) > 16) {
return;
}
#else
- for (i = 2, u = v; i > 0; i--, u >>= 8)
+ for (i = 3, u = v; i > 0; i--, u >>= 8)
while (u > 0xff && !(u & 3))
u >>= 2;
if (u) { // 4 insns needed...
} while (0)
/*
+ * T = carry(Rn = (Rn << 1) | T)
* if Q
- * t = carry(Rn += Rm)
+ * T ^= !carry(Rn += Rm)
* else
- * t = carry(Rn -= Rm)
- * T ^= t
+ * T ^= !carry(Rn -= Rm)
*/
#define emith_sh2_div1_step(rn, rm, sr) do { \
void *jmp0, *jmp1; \
+ emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\
+ emith_adcf_r_r_r(rn, rn, rn); \
+ emith_tpush_carry(sr, 0); \
emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
JMP_POS(jmp0); /* beq do_sub */ \
- emith_addf_r_r(rn, rm); \
- emith_eor_r_imm_c(A_COND_CS, sr, T); \
+ emith_addf_r_r(rn, rm); /* Rn += Rm */ \
+ emith_eor_r_imm_c(A_COND_CC, sr, T); \
JMP_POS(jmp1); /* b done */ \
JMP_EMIT(A_COND_EQ, jmp0); /* do_sub: */ \
- emith_subf_r_r(rn, rm); \
- emith_eor_r_imm_c(A_COND_CC, sr, T); \
+ emith_subf_r_r(rn, rm); /* Rn -= Rm */ \
+ emith_eor_r_imm_c(A_COND_CS, sr, T); \
JMP_EMIT(A_COND_AL, jmp1); /* done: */ \
} while (0)
#define EMITH_HINT_COND(cond) /**/
-// "simple" jump (no more then a few insns)
+// "simple" jump (no more than a few insns)
// ARM32 will use conditional instructions here
#define EMITH_SJMP_START EMITH_JMP_START
#define EMITH_SJMP_END EMITH_JMP_END
} while (0)
/*
+ * T = carry(Rn = (Rn << 1) | T)
* if Q
- * t = carry(Rn += Rm)
+ * t = !carry(Rn += Rm)
* else
- * t = carry(Rn -= Rm)
+ * t = !carry(Rn -= Rm)
* T ^= t
*/
#define emith_sh2_div1_step(rn, rm, sr) do { \
int tmp_ = rcache_get_tmp(); \
- emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
+ emith_tpop_carry(sr, 0); \
+ emith_adcf_r_r_r(rn, rn, rn); \
+ emith_tpush_carry(sr, 0); \
+ emith_tst_r_imm(sr, Q); \
EMITH_SJMP3_START(DCOND_EQ); \
emith_addf_r_r(rn, rm); \
emith_adc_r_r_r(tmp_, Z0, Z0); \
+ emith_eor_r_imm(tmp_, 1); \
EMITH_SJMP3_MID(DCOND_EQ); \
emith_subf_r_r(rn, rm); \
emith_adc_r_r_r(tmp_, Z0, Z0); \
- emith_eor_r_imm(tmp_, 1); \
EMITH_SJMP3_END(); \
emith_eor_r_r(sr, tmp_); \
rcache_free_tmp(tmp_); \
*/
#define HOST_REGS 32
-// MIPS ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra),
+// MIPS32 ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra)
// saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp)
// r1,r15,r24,r25(at,t7-t9) are used internally by the code emitter
+// MIPSN32/MIPS64 ABI: params: r4-r11, no caller-reserved save area on stack
#define RET_REG 2 // v0
#define PARAM_REGS { 4, 5, 6, 7 } // a0-a3
#define PRESERVED_REGS { 16, 17, 18, 19, 20, 21, 22, 23 } // s0-s7
JMP_EMIT_NC(else_ptr); \
}
-// "simple" jump (no more then a few insns)
+// "simple" jump (no more than a few insns)
// ARM32 will use conditional instructions here
#define EMITH_SJMP_START EMITH_JMP_START
#define EMITH_SJMP_END EMITH_JMP_END
EMIT(MIPS_OR_IMM(r, r, imm & 0xffff));
} else
#endif
- if ((s16)imm == imm) {
+ if ((s16)imm == imm) {
EMIT(MIPS_ADD_IMM(r, Z0, imm));
} else if (!((u32)imm >> 16)) {
EMIT(MIPS_OR_IMM(r, Z0, imm));
} while (0)
/*
+ * T = !carry(Rn = (Rn << 1) | T)
* if Q
- * t = carry(Rn += Rm)
+ * C = carry(Rn += Rm)
* else
- * t = carry(Rn -= Rm)
- * T ^= t
+ * C = carry(Rn -= Rm)
+ * T ^= C
*/
#define emith_sh2_div1_step(rn, rm, sr) do { \
+ int t_ = rcache_get_tmp(); \
+ emith_and_r_r_imm(AT, sr, T); \
+ emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \
+ emith_lsl(t_, rn, 1); \
+ emith_or_r_r(t_, AT); \
+ emith_or_r_imm(sr, T); /* T = !carry */ \
+ emith_eor_r_r(sr, FC); \
emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
EMITH_JMP3_START(DCOND_EQ); \
- EMITH_HINT_COND(DCOND_CS); \
- emith_addf_r_r(rn, rm); \
+ emith_add_r_r_r(rn, t_, rm); \
+ EMIT(MIPS_SLTU_REG(FC, rn, t_)); \
EMITH_JMP3_MID(DCOND_EQ); \
- EMITH_HINT_COND(DCOND_CS); \
- emith_subf_r_r(rn, rm); \
+ emith_sub_r_r_r(rn, t_, rm); \
+ EMIT(MIPS_SLTU_REG(FC, t_, rn)); \
EMITH_JMP3_END(); \
- emith_eor_r_r(sr, FC); \
+ emith_eor_r_r(sr, FC); /* T ^= carry */ \
+ rcache_free_tmp(t_); \
} while (0)
/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */
*/
#define HOST_REGS 32
-// RISC-V ABI: params: x10-x17, return: r10-x11, temp: x1(ra),x5-x7,x28-x31
+// RISC-V ABI: params: x10-x17, return: x10-x11, temp: x1(ra),x5-x7,x28-x31
// saved: x8(fp),x9,x18-x27, reserved: x0(zero), x4(tp), x3(gp), x2(sp)
// x28-x31(t3-t6) are used internally by the code emitter
#define RET_REG 10 // a0
_CB(imm,8,12,0), rd, op)
// opcode
-enum { OP_LUI=0x37, OP_JAL=0x6f, OP_JALR=0x67, OP_BCOND=0x63, OP_LD=0x03,
- OP_ST=0x23, OP_IMM=0x13, OP_IMM32=0x1b, OP_REG=0x33, OP_REG32=0x3b };
+enum { OP_LUI=0x37, OP_AUIPC=0x17, OP_JAL=0x6f, // 20-bit immediate
+ OP_JALR=0x67, OP_BCOND=0x63, OP_LD=0x03, OP_ST=0x23, // 12-bit immediate
+ OP_IMM=0x13, OP_REG=0x33, OP_IMM32=0x1b, OP_REG32=0x3b };
// func3
-enum { F1_ADD, F1_SL, F1_SLT, F1_SLTU, F1_XOR, F1_SR, F1_OR, F1_AND };
-enum { F1_BEQ, F1_BNE, F1_BLT=4, F1_BGE, F1_BLTU, F1_BGEU };
-enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU };
+enum { F1_ADD, F1_SL, F1_SLT, F1_SLTU, F1_XOR, F1_SR, F1_OR, F1_AND };// IMM/REG
enum { F1_MUL, F1_MULH, F1_MULHSU, F1_MULHU, F1_DIV, F1_DIVU, F1_REM, F1_REMU };
+enum { F1_BEQ, F1_BNE, F1_BLT=4, F1_BGE, F1_BLTU, F1_BGEU }; // BCOND
+enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; // LD/ST
// func7
enum { F2_ALT=0x20, F2_MULDIV=0x01 };
R5_OR_IMM(rd, Z0, imm12)
#define R5_MOVT_IMM(rd, imm20) \
R5_U_INSN(OP_LUI, rd, imm20)
+#define R5_MOVA_IMM(rd, imm20) \
+ R5_U_INSN(OP_AUIPC, rd, imm20)
// rd = rs SHIFT imm5/imm6
#define R5_LSL_IMM(rd, rs, bits) \
#define PTR_SCALE 3
// NB: must split 64 bit result into 2 32 bit registers
-// NB: this expects 32 bit values in s1+s2, correctly sign extended to 64 bits
+// NB: expects 32 bit values in s1+s2, correctly sign extended to 64 bits
#define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \
+ /*EMIT(R5_ADDW_IMM(s1, s1, 0));*/ \
+ /*EMIT(R5_ADDW_IMM(s2, s2, 0));*/ \
EMIT(R5_MUL(dlo, s1, s2)); \
EMIT(R5_LSR_IMM(dhi, dlo, 32)); \
EMIT(R5_LSL_IMM(dlo, dlo, 32)); \
JMP_EMIT_NC(else_ptr); \
}
-// "simple" jump (no more then a few insns)
+// "simple" jump (no more than a few insns)
// ARM32 will use conditional instructions here
#define EMITH_SJMP_START EMITH_JMP_START
#define EMITH_SJMP_END EMITH_JMP_END
// move immediate
+#define MAX_HOST_LITERALS 32 // pool must be smaller than 4 KB
+static uintptr_t literal_pool[MAX_HOST_LITERALS];
+static u32 *literal_insn[MAX_HOST_LITERALS];
+static int literal_pindex, literal_iindex;
+
+static inline int emith_pool_literal(uintptr_t imm)
+{
+ int idx = literal_pindex - 8; // max look behind in pool
+ // see if one of the last literals was the same (or close enough)
+ for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++)
+ if (imm == literal_pool[idx])
+ break;
+ if (idx == literal_pindex) // store new literal
+ literal_pool[literal_pindex++] = imm;
+ return idx;
+}
+
+static void emith_pool_commit(int jumpover)
+{
+ int i, sz = literal_pindex * sizeof(uintptr_t);
+ u8 *pool = (u8 *)tcache_ptr;
+
+ // nothing to commit if pool is empty
+ if (sz == 0)
+ return;
+ // align pool to pointer size
+ if (jumpover)
+ pool += sizeof(u32);
+ i = (uintptr_t)pool & (sizeof(void *)-1);
+ pool += (i ? sizeof(void *)-i : 0);
+ // need branch over pool if not at block end
+ if (jumpover)
+ EMIT(R5_B(sz + (pool-(u8 *)tcache_ptr)));
+ // safety check - pool must be after insns and reachable
+ if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0x7ff) {
+ elprintf(EL_STATUS|EL_SVP|EL_ANOMALY,
+ "pool offset out of range");
+ exit(1);
+ }
+ // copy pool and adjust addresses in insns accessing the pool
+ memcpy(pool, literal_pool, sz);
+ for (i = 0; i < literal_iindex; i++) {
+ *literal_insn[i] += ((u8 *)pool - (u8 *)literal_insn[i]) << 20;
+ }
+ // count pool constants as insns for statistics
+ for (i = 0; i < literal_pindex * sizeof(uintptr_t)/sizeof(u32); i++)
+ COUNT_OP;
+
+ tcache_ptr = (void *)((u8 *)pool + sz);
+ literal_pindex = literal_iindex = 0;
+}
+
+static void emith_pool_check(void)
+{
+ // check if pool must be committed
+ if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex &&
+ (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0x700))
+ // pool full, or displacement is approaching the limit
+ emith_pool_commit(1);
+}
+
static void emith_move_imm(int r, uintptr_t imm)
{
u32 lui = imm + _CB(imm,1,11,12);
EMIT(R5_ADD_IMM(r, Z0, imm));
}
+static void emith_move_ptr_imm(int r, uintptr_t imm)
+{
+#if __riscv_xlen == 64
+ if ((s32)imm != imm) {
+ int idx;
+ if (literal_iindex >= MAX_HOST_LITERALS)
+ emith_pool_commit(1);
+ idx = emith_pool_literal(imm);
+ EMIT(R5_MOVA_IMM(AT, 0)); // loads PC of MOVA insn... + 4 in LD
+ literal_insn[literal_iindex++] = (u32 *)tcache_ptr;
+ EMIT(R5_I_INSN(OP_LD, F1_P, r, AT, idx*sizeof(uintptr_t) + 4));
+ } else
+#endif
+ emith_move_imm(r, imm);
+}
+
#define emith_move_r_ptr_imm(r, imm) \
- emith_move_imm(r, (uintptr_t)(imm))
+ emith_move_ptr_imm(r, (uintptr_t)(imm))
#define emith_move_r_imm(r, imm) \
emith_move_imm(r, (u32)(imm))
EMIT(R5_ADD_IMM(r, Z0, (s8)(imm)))
#define emith_move_r_imm_s8_patch(ptr, imm) do { \
u32 *ptr_ = (u32 *)ptr; \
- while ((*ptr_ & 0xff07f) != R5_ADD_IMM(Z0, Z0, 0)) ptr_++; \
EMIT_PTR(ptr_, (*ptr_ & 0x000fffff) | ((u16)(s8)(imm)<<20)); \
} while (0)
// NB: returns position of patch for cache maintenance
#define emith_jump_patch(ptr, target, pos) do { \
u32 *ptr_ = (u32 *)ptr; /* must skip condition check code */ \
- while ((*ptr_&0x77) != OP_JALR && (*ptr_&0x77) != OP_BCOND) ptr_ ++; \
if ((*ptr_&0x77) == OP_BCOND) { \
u32 *p_ = ptr_, disp_ = (u8 *)target - (u8 *)ptr_; \
u32 f1_ = _CB(*ptr_,3,12,0); \
// emitter ABI stuff
-#define emith_pool_check() /**/
-#define emith_pool_commit(j) /**/
#define emith_insn_ptr() ((u8 *)tcache_ptr)
#define emith_flush() /**/
#define host_instructions_updated(base, end) __builtin___clear_cache(base, end)
} while (0)
/*
+ * T = !carry(Rn = (Rn << 1) | T)
* if Q
- * t = carry(Rn += Rm)
+ * C = carry(Rn += Rm)
* else
- * t = carry(Rn -= Rm)
- * T ^= t
+ * C = carry(Rn -= Rm)
+ * T ^= C
*/
#define emith_sh2_div1_step(rn, rm, sr) do { \
+ int t_ = rcache_get_tmp(); \
+ emith_and_r_r_imm(AT, sr, T); \
+ emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \
+ emith_lsl(t_, rn, 1); \
+ emith_or_r_r(t_, AT); \
+ emith_or_r_imm(sr, T); /* T = !carry */ \
+ emith_eor_r_r(sr, FC); \
emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
EMITH_JMP3_START(DCOND_EQ); \
- EMITH_HINT_COND(DCOND_CS); \
- emith_addf_r_r(rn, rm); \
+ emith_add_r_r_r(rn, t_, rm); \
+ EMIT(R5_SLTU_REG(FC, rn, t_)); \
EMITH_JMP3_MID(DCOND_EQ); \
- EMITH_HINT_COND(DCOND_CS); \
- emith_subf_r_r(rn, rm); \
+ emith_sub_r_r_r(rn, t_, rm); \
+ EMIT(R5_SLTU_REG(FC, t_, rn)); \
EMITH_JMP3_END(); \
- emith_eor_r_r(sr, FC); \
+ emith_eor_r_r(sr, FC); /* T ^= carry */ \
+ rcache_free_tmp(t_); \
} while (0)
/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */
JMP8_EMIT_NC(else_ptr); \
}
-// "simple" jump (no more then a few insns)
+// "simple" jump (no more than a few insns)
// ARM will use conditional instructions here
#define EMITH_SJMP_START EMITH_JMP_START
#define EMITH_SJMP_END EMITH_JMP_END
emith_adc_r_r(sr, sr)
/*
+ * T = carry(Rn = (Rn << 1) | T)
* if Q
* t = carry(Rn += Rm)
* else
* t = carry(Rn -= Rm)
- * T ^= t
+ * T = !(T ^ t)
*/
#define emith_sh2_div1_step(rn, rm, sr) do { \
u8 *jmp0, *jmp1; \
int tmp_ = rcache_get_tmp(); \
+ emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\
+ emith_adcf_r_r_r(rn, rn, rn); \
+ emith_tpush_carry(sr, 0); /* T = C1 */ \
emith_eor_r_r(tmp_, tmp_); \
emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \
JMP8_POS(jmp0); /* je do_sub */ \
emith_sub_r_r(rn, rm); \
JMP8_EMIT_NC(jmp1); /* done: */ \
emith_adc_r_r(tmp_, tmp_); \
- emith_eor_r_r(sr, tmp_); \
+ emith_eor_r_r(sr, tmp_);/* T = !(C1^C2) */\
+ emith_eor_r_imm(sr, T); \
rcache_free_tmp(tmp_); \
} while (0)
struct block_link *bl;
int u, v, tmp;
+ emith_flush();
for (u = 0; u < link_count; u++) {
emith_pool_check();
// look up local branch targets
- v = find_in_sorted_linkage(targets, target_count, links[u].pc);
- if (v >= 0) {
- if (! targets[v].ptr) {
+ if (links[u].mask & 0x2) {
+ v = find_in_sorted_linkage(targets, target_count, links[u].pc);
+ if (v < 0 || ! targets[v].ptr) {
// forward branch not yet resolved, prepare external linking
emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id);
- if (bl) {
- emith_flush(); // flush to inhibit insn swapping
+ if (bl)
bl->type = BL_LDJMP;
- }
-
tmp = rcache_get_tmp_arg(0);
emith_move_r_imm(tmp, links[u].pc);
rcache_free_tmp(tmp);
}
} else {
// external or exit, emit blx area entry
- void *target = (links[u].pc & 1 ? sh2_drc_exit : sh2_drc_dispatcher);
+ void *target = (links[u].mask & 0x1 ? sh2_drc_exit : sh2_drc_dispatcher);
if (links[u].bl)
links[u].bl->blx = tcache_ptr;
emith_jump_patch(links[u].ptr, tcache_ptr, NULL);
u8 op_flags[BLOCK_INSN_LIMIT];
+ enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 };
struct drcf {
int delay_reg:8;
u32 loop_type:8;
u32 test_irq:1;
u32 pending_branch_direct:1;
u32 pending_branch_indirect:1;
+ u32 Tflag:2, Mflag:2;
} drcf = { 0, };
#if LOOP_OPTIMIZER
if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) &&
pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) {
pinned_loops[pinned_loop_count++] =
- (struct linkage) { .mask = m3, .pc = base_pc + 2*v };
+ (struct linkage) { .pc = base_pc + 2*v, .mask = m3 };
} else
op_flags[v] &= ~OF_BASIC_LOOP;
}
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
FLUSH_CYCLES(sr);
emith_sync_t(sr);
+ drcf.Mflag = FLG_UNKNOWN;
rcache_flush();
emith_flush();
}
if (blx_target_count < ARRAY_SIZE(blx_targets)) {
// exit via stub in blx table (saves some 1-3 insns in the main flow)
blx_targets[blx_target_count++] =
- (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL };
+ (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 };
emith_jump_patchable(tcache_ptr);
} else {
// blx table full, must inline exit code
// exit via stub in blx table (saves some 1-3 insns in the main flow)
emith_cmp_r_imm(sr, 0);
blx_targets[blx_target_count++] =
- (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL };
+ (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 };
emith_jump_cond_patchable(DCOND_LE, tcache_ptr);
} else {
// blx table full, must inline exit code
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_invalidate_t();
emith_bic_r_imm(sr, M|Q|T);
+ drcf.Mflag = FLG_0;
break;
case 2: // MOVT Rn 0000nnnn00101001
sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
emith_eor_r_r_lsr(tmp, tmp2, 31);
emith_or_r_r(sr, tmp); // T = Q^M
rcache_free(tmp);
+ drcf.Mflag = FLG_UNKNOWN;
goto end_op;
case 0x08: // TST Rm,Rn 0010nnnnmmmm1000
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL);
+ tmp4 = rcache_get_tmp();
if (op & 1) {
emith_sext(tmp, tmp2, 16);
- } else
+ emith_sext(tmp4, tmp3, 16);
+ } else {
emith_clear_msb(tmp, tmp2, 16);
- tmp2 = rcache_get_tmp();
- if (op & 1) {
- emith_sext(tmp2, tmp3, 16);
- } else
- emith_clear_msb(tmp2, tmp3, 16);
- emith_mul(tmp, tmp, tmp2);
- rcache_free_tmp(tmp2);
+ emith_clear_msb(tmp4, tmp3, 16);
+ }
+ emith_mul(tmp, tmp, tmp4);
+ rcache_free_tmp(tmp4);
goto end_op;
}
goto default_;
// Q = M ^ Q1 ^ Q2
// T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2)
tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL);
- tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp4);
+ tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_sync_t(sr);
- EMITH_HINT_COND(DCOND_CS);
- emith_tpop_carry(sr, 0);
- emith_adcf_r_r_r(tmp2, tmp4, tmp4);
- emith_tpush_carry(sr, 0); // keep Q1 in T for now
- rcache_free(tmp4);
tmp = rcache_get_tmp();
- emith_and_r_r_imm(tmp, sr, M);
- emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M
+ if (drcf.Mflag != FLG_0) {
+ emith_and_r_r_imm(tmp, sr, M);
+ emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M
+ }
rcache_free_tmp(tmp);
- // add or sub, invert T if carry to get Q1 ^ Q2
- // in: (Q ^ M) passed in Q, Q1 in T
+ // shift Rn, add T, add or sub Rm, set T = !(Q1 ^ Q2)
+ // in: (Q ^ M) passed in Q
emith_sh2_div1_step(tmp2, tmp3, sr);
tmp = rcache_get_tmp();
- emith_bic_r_imm(sr, Q); // Q = M
- emith_and_r_r_imm(tmp, sr, M);
- emith_or_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT);
- emith_and_r_r_imm(tmp, sr, T); // Q = M ^ Q1 ^ Q2
+ emith_or_r_imm(sr, Q); // Q = !T
+ emith_and_r_r_imm(tmp, sr, T);
emith_eor_r_r_lsl(sr, tmp, Q_SHIFT);
- emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2)
+ if (drcf.Mflag != FLG_0) { // Q = M ^ !T = M ^ Q1 ^ Q2
+ emith_and_r_r_imm(tmp, sr, M);
+ emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT);
+ }
+ rcache_free_tmp(tmp);
goto end_op;
case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101
tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL);
// local forward jump
target = tcache_ptr;
blx_targets[blx_target_count++] =
- (struct linkage) { .pc = target_pc, .ptr = target, .bl = NULL };
+ (struct linkage) { .pc = target_pc, .ptr = target, .mask = 0x2 };
if (cond != -1)
emith_jump_cond_patchable(cond, target);
else {