From: kub Date: Tue, 19 Nov 2019 20:59:44 +0000 (+0100) Subject: sh2 drc, small improvements and bug fixes for code emitters X-Git-Tag: v2.00~812 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f2d19ddf2a4f2d8f3950d3d5dd90fdcd74cc7a82;p=picodrive.git sh2 drc, small improvements and bug fixes for code emitters --- diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 8f633fa3..8ea148eb 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -478,6 +478,7 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int switch (op) { case A_OP_MOV: + case A_OP_MVN: rn = 0; // use MVN if more bits 1 than 0 if (count_bits(imm) > 16) { @@ -501,7 +502,7 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int return; } #else - for (i = 2, u = v; i > 0; i--, u >>= 8) + for (i = 3, u = v; i > 0; i--, u >>= 8) while (u > 0xff && !(u & 3)) u >>= 2; if (u) { // 4 insns needed... @@ -1387,22 +1388,25 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) } while (0) /* + * T = carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * T ^= !carry(Rn += Rm) * else - * t = carry(Rn -= Rm) - * T ^= t + * T ^= !carry(Rn -= Rm) */ #define emith_sh2_div1_step(rn, rm, sr) do { \ void *jmp0, *jmp1; \ + emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ JMP_POS(jmp0); /* beq do_sub */ \ - emith_addf_r_r(rn, rm); \ - emith_eor_r_imm_c(A_COND_CS, sr, T); \ + emith_addf_r_r(rn, rm); /* Rn += Rm */ \ + emith_eor_r_imm_c(A_COND_CC, sr, T); \ JMP_POS(jmp1); /* b done */ \ JMP_EMIT(A_COND_EQ, jmp0); /* do_sub: */ \ - emith_subf_r_r(rn, rm); \ - emith_eor_r_imm_c(A_COND_CC, sr, T); \ + emith_subf_r_r(rn, rm); /* Rn -= Rm */ \ + emith_eor_r_imm_c(A_COND_CS, sr, T); \ JMP_EMIT(A_COND_AL, jmp1); /* done: */ \ } while (0) diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 3f40d4cd..8f4718ee 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -372,7 +372,7 @@ enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; #define EMITH_HINT_COND(cond) /**/ -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM32 will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -1240,22 +1240,26 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) } while (0) /* + * T = carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * t = !carry(Rn += Rm) * else - * t = carry(Rn -= Rm) + * t = !carry(Rn -= Rm) * T ^= t */ #define emith_sh2_div1_step(rn, rm, sr) do { \ int tmp_ = rcache_get_tmp(); \ - emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + emith_tpop_carry(sr, 0); \ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); \ + emith_tst_r_imm(sr, Q); \ EMITH_SJMP3_START(DCOND_EQ); \ emith_addf_r_r(rn, rm); \ emith_adc_r_r_r(tmp_, Z0, Z0); \ + emith_eor_r_imm(tmp_, 1); \ EMITH_SJMP3_MID(DCOND_EQ); \ emith_subf_r_r(rn, rm); \ emith_adc_r_r_r(tmp_, Z0, Z0); \ - emith_eor_r_imm(tmp_, 1); \ EMITH_SJMP3_END(); \ emith_eor_r_r(sr, tmp_); \ rcache_free_tmp(tmp_); \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 6f07e509..c9c006c8 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -7,9 +7,10 @@ */ #define HOST_REGS 32 -// MIPS ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra), +// MIPS32 ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra) // saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp) // r1,r15,r24,r25(at,t7-t9) are used internally by the code emitter +// MIPSN32/MIPS64 ABI: params: r4-r11, no caller-reserved save area on stack #define RET_REG 2 // v0 #define PARAM_REGS { 4, 5, 6, 7 } // a0-a3 #define PRESERVED_REGS { 16, 17, 18, 19, 20, 21, 22, 23 } // s0-s7 @@ -424,7 +425,7 @@ static void *emith_branch(u32 op) JMP_EMIT_NC(else_ptr); \ } -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM32 will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -761,7 +762,7 @@ static void emith_move_imm(int r, uintptr_t imm) EMIT(MIPS_OR_IMM(r, r, imm & 0xffff)); } else #endif - if ((s16)imm == imm) { + if ((s16)imm == imm) { EMIT(MIPS_ADD_IMM(r, Z0, imm)); } else if (!((u32)imm >> 16)) { EMIT(MIPS_OR_IMM(r, Z0, imm)); @@ -1576,22 +1577,31 @@ static int emith_cond_check(int cond, int *r) } while (0) /* + * T = !carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * C = carry(Rn += Rm) * else - * t = carry(Rn -= Rm) - * T ^= t + * C = carry(Rn -= Rm) + * T ^= C */ #define emith_sh2_div1_step(rn, rm, sr) do { \ + int t_ = rcache_get_tmp(); \ + emith_and_r_r_imm(AT, sr, T); \ + emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \ + emith_lsl(t_, rn, 1); \ + emith_or_r_r(t_, AT); \ + emith_or_r_imm(sr, T); /* T = !carry */ \ + emith_eor_r_r(sr, FC); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ EMITH_JMP3_START(DCOND_EQ); \ - EMITH_HINT_COND(DCOND_CS); \ - emith_addf_r_r(rn, rm); \ + emith_add_r_r_r(rn, t_, rm); \ + EMIT(MIPS_SLTU_REG(FC, rn, t_)); \ EMITH_JMP3_MID(DCOND_EQ); \ - EMITH_HINT_COND(DCOND_CS); \ - emith_subf_r_r(rn, rm); \ + emith_sub_r_r_r(rn, t_, rm); \ + EMIT(MIPS_SLTU_REG(FC, t_, rn)); \ EMITH_JMP3_END(); \ - emith_eor_r_r(sr, FC); \ + emith_eor_r_r(sr, FC); /* T ^= carry */ \ + rcache_free_tmp(t_); \ } while (0) /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index 84c3ccb2..b66d6350 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -7,7 +7,7 @@ */ #define HOST_REGS 32 -// RISC-V ABI: params: x10-x17, return: r10-x11, temp: x1(ra),x5-x7,x28-x31 +// RISC-V ABI: params: x10-x17, return: x10-x11, temp: x1(ra),x5-x7,x28-x31 // saved: x8(fp),x9,x18-x27, reserved: x0(zero), x4(tp), x3(gp), x2(sp) // x28-x31(t3-t6) are used internally by the code emitter #define RET_REG 10 // a0 @@ -74,13 +74,14 @@ _CB(imm,8,12,0), rd, op) // opcode -enum { OP_LUI=0x37, OP_JAL=0x6f, OP_JALR=0x67, OP_BCOND=0x63, OP_LD=0x03, - OP_ST=0x23, OP_IMM=0x13, OP_IMM32=0x1b, OP_REG=0x33, OP_REG32=0x3b }; +enum { OP_LUI=0x37, OP_AUIPC=0x17, OP_JAL=0x6f, // 20-bit immediate + OP_JALR=0x67, OP_BCOND=0x63, OP_LD=0x03, OP_ST=0x23, // 12-bit immediate + OP_IMM=0x13, OP_REG=0x33, OP_IMM32=0x1b, OP_REG32=0x3b }; // func3 -enum { F1_ADD, F1_SL, F1_SLT, F1_SLTU, F1_XOR, F1_SR, F1_OR, F1_AND }; -enum { F1_BEQ, F1_BNE, F1_BLT=4, F1_BGE, F1_BLTU, F1_BGEU }; -enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; +enum { F1_ADD, F1_SL, F1_SLT, F1_SLTU, F1_XOR, F1_SR, F1_OR, F1_AND };// IMM/REG enum { F1_MUL, F1_MULH, F1_MULHSU, F1_MULHU, F1_DIV, F1_DIVU, F1_REM, F1_REMU }; +enum { F1_BEQ, F1_BNE, F1_BLT=4, F1_BGE, F1_BLTU, F1_BGEU }; // BCOND +enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; // LD/ST // func7 enum { F2_ALT=0x20, F2_MULDIV=0x01 }; @@ -141,6 +142,8 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; R5_OR_IMM(rd, Z0, imm12) #define R5_MOVT_IMM(rd, imm20) \ R5_U_INSN(OP_LUI, rd, imm20) +#define R5_MOVA_IMM(rd, imm20) \ + R5_U_INSN(OP_AUIPC, rd, imm20) // rd = rs SHIFT imm5/imm6 #define R5_LSL_IMM(rd, rs, bits) \ @@ -212,8 +215,10 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; #define PTR_SCALE 3 // NB: must split 64 bit result into 2 32 bit registers -// NB: this expects 32 bit values in s1+s2, correctly sign extended to 64 bits +// NB: expects 32 bit values in s1+s2, correctly sign extended to 64 bits #define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \ + /*EMIT(R5_ADDW_IMM(s1, s1, 0));*/ \ + /*EMIT(R5_ADDW_IMM(s2, s2, 0));*/ \ EMIT(R5_MUL(dlo, s1, s2)); \ EMIT(R5_LSR_IMM(dhi, dlo, 32)); \ EMIT(R5_LSL_IMM(dlo, dlo, 32)); \ @@ -307,7 +312,7 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; JMP_EMIT_NC(else_ptr); \ } -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM32 will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -620,6 +625,67 @@ static void emith_set_compare_flags(int rs, int rt, s32 imm) // move immediate +#define MAX_HOST_LITERALS 32 // pool must be smaller than 4 KB +static uintptr_t literal_pool[MAX_HOST_LITERALS]; +static u32 *literal_insn[MAX_HOST_LITERALS]; +static int literal_pindex, literal_iindex; + +static inline int emith_pool_literal(uintptr_t imm) +{ + int idx = literal_pindex - 8; // max look behind in pool + // see if one of the last literals was the same (or close enough) + for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) + if (imm == literal_pool[idx]) + break; + if (idx == literal_pindex) // store new literal + literal_pool[literal_pindex++] = imm; + return idx; +} + +static void emith_pool_commit(int jumpover) +{ + int i, sz = literal_pindex * sizeof(uintptr_t); + u8 *pool = (u8 *)tcache_ptr; + + // nothing to commit if pool is empty + if (sz == 0) + return; + // align pool to pointer size + if (jumpover) + pool += sizeof(u32); + i = (uintptr_t)pool & (sizeof(void *)-1); + pool += (i ? sizeof(void *)-i : 0); + // need branch over pool if not at block end + if (jumpover) + EMIT(R5_B(sz + (pool-(u8 *)tcache_ptr))); + // safety check - pool must be after insns and reachable + if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0x7ff) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool offset out of range"); + exit(1); + } + // copy pool and adjust addresses in insns accessing the pool + memcpy(pool, literal_pool, sz); + for (i = 0; i < literal_iindex; i++) { + *literal_insn[i] += ((u8 *)pool - (u8 *)literal_insn[i]) << 20; + } + // count pool constants as insns for statistics + for (i = 0; i < literal_pindex * sizeof(uintptr_t)/sizeof(u32); i++) + COUNT_OP; + + tcache_ptr = (void *)((u8 *)pool + sz); + literal_pindex = literal_iindex = 0; +} + +static void emith_pool_check(void) +{ + // check if pool must be committed + if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex && + (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0x700)) + // pool full, or displacement is approaching the limit + emith_pool_commit(1); +} + static void emith_move_imm(int r, uintptr_t imm) { u32 lui = imm + _CB(imm,1,11,12); @@ -632,8 +698,24 @@ static void emith_move_imm(int r, uintptr_t imm) EMIT(R5_ADD_IMM(r, Z0, imm)); } +static void emith_move_ptr_imm(int r, uintptr_t imm) +{ +#if __riscv_xlen == 64 + if ((s32)imm != imm) { + int idx; + if (literal_iindex >= MAX_HOST_LITERALS) + emith_pool_commit(1); + idx = emith_pool_literal(imm); + EMIT(R5_MOVA_IMM(AT, 0)); // loads PC of MOVA insn... + 4 in LD + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EMIT(R5_I_INSN(OP_LD, F1_P, r, AT, idx*sizeof(uintptr_t) + 4)); + } else +#endif + emith_move_imm(r, imm); +} + #define emith_move_r_ptr_imm(r, imm) \ - emith_move_imm(r, (uintptr_t)(imm)) + emith_move_ptr_imm(r, (uintptr_t)(imm)) #define emith_move_r_imm(r, imm) \ emith_move_imm(r, (u32)(imm)) @@ -644,7 +726,6 @@ static void emith_move_imm(int r, uintptr_t imm) EMIT(R5_ADD_IMM(r, Z0, (s8)(imm))) #define emith_move_r_imm_s8_patch(ptr, imm) do { \ u32 *ptr_ = (u32 *)ptr; \ - while ((*ptr_ & 0xff07f) != R5_ADD_IMM(Z0, Z0, 0)) ptr_++; \ EMIT_PTR(ptr_, (*ptr_ & 0x000fffff) | ((u16)(s8)(imm)<<20)); \ } while (0) @@ -1235,7 +1316,6 @@ static int emith_cond_check(int cond, int *r, int *s) // NB: returns position of patch for cache maintenance #define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr; /* must skip condition check code */ \ - while ((*ptr_&0x77) != OP_JALR && (*ptr_&0x77) != OP_BCOND) ptr_ ++; \ if ((*ptr_&0x77) == OP_BCOND) { \ u32 *p_ = ptr_, disp_ = (u8 *)target - (u8 *)ptr_; \ u32 f1_ = _CB(*ptr_,3,12,0); \ @@ -1319,8 +1399,6 @@ static int emith_cond_check(int cond, int *r, int *s) // emitter ABI stuff -#define emith_pool_check() /**/ -#define emith_pool_commit(j) /**/ #define emith_insn_ptr() ((u8 *)tcache_ptr) #define emith_flush() /**/ #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) @@ -1404,22 +1482,31 @@ static int emith_cond_check(int cond, int *r, int *s) } while (0) /* + * T = !carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * C = carry(Rn += Rm) * else - * t = carry(Rn -= Rm) - * T ^= t + * C = carry(Rn -= Rm) + * T ^= C */ #define emith_sh2_div1_step(rn, rm, sr) do { \ + int t_ = rcache_get_tmp(); \ + emith_and_r_r_imm(AT, sr, T); \ + emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \ + emith_lsl(t_, rn, 1); \ + emith_or_r_r(t_, AT); \ + emith_or_r_imm(sr, T); /* T = !carry */ \ + emith_eor_r_r(sr, FC); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ EMITH_JMP3_START(DCOND_EQ); \ - EMITH_HINT_COND(DCOND_CS); \ - emith_addf_r_r(rn, rm); \ + emith_add_r_r_r(rn, t_, rm); \ + EMIT(R5_SLTU_REG(FC, rn, t_)); \ EMITH_JMP3_MID(DCOND_EQ); \ - EMITH_HINT_COND(DCOND_CS); \ - emith_subf_r_r(rn, rm); \ + emith_sub_r_r_r(rn, t_, rm); \ + EMIT(R5_SLTU_REG(FC, t_, rn)); \ EMITH_JMP3_END(); \ - emith_eor_r_r(sr, FC); \ + emith_eor_r_r(sr, FC); /* T ^= carry */ \ + rcache_free_tmp(t_); \ } while (0) /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 9ed8b563..0b3f7697 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -974,7 +974,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common JMP8_EMIT_NC(else_ptr); \ } -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -1287,15 +1287,19 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_adc_r_r(sr, sr) /* + * T = carry(Rn = (Rn << 1) | T) * if Q * t = carry(Rn += Rm) * else * t = carry(Rn -= Rm) - * T ^= t + * T = !(T ^ t) */ #define emith_sh2_div1_step(rn, rm, sr) do { \ u8 *jmp0, *jmp1; \ int tmp_ = rcache_get_tmp(); \ + emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); /* T = C1 */ \ emith_eor_r_r(tmp_, tmp_); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ JMP8_POS(jmp0); /* je do_sub */ \ @@ -1305,7 +1309,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_sub_r_r(rn, rm); \ JMP8_EMIT_NC(jmp1); /* done: */ \ emith_adc_r_r(tmp_, tmp_); \ - emith_eor_r_r(sr, tmp_); \ + emith_eor_r_r(sr, tmp_);/* T = !(C1^C2) */\ + emith_eor_r_imm(sr, T); \ rcache_free_tmp(tmp_); \ } while (0) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 58ddd86f..a12dfe96 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2957,20 +2957,18 @@ static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tca struct block_link *bl; int u, v, tmp; + emith_flush(); for (u = 0; u < link_count; u++) { emith_pool_check(); // look up local branch targets - v = find_in_sorted_linkage(targets, target_count, links[u].pc); - if (v >= 0) { - if (! targets[v].ptr) { + if (links[u].mask & 0x2) { + v = find_in_sorted_linkage(targets, target_count, links[u].pc); + if (v < 0 || ! targets[v].ptr) { // forward branch not yet resolved, prepare external linking emith_jump_patch(links[u].ptr, tcache_ptr, NULL); bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id); - if (bl) { - emith_flush(); // flush to inhibit insn swapping + if (bl) bl->type = BL_LDJMP; - } - tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, links[u].pc); rcache_free_tmp(tmp); @@ -2985,7 +2983,7 @@ static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tca } } else { // external or exit, emit blx area entry - void *target = (links[u].pc & 1 ? sh2_drc_exit : sh2_drc_dispatcher); + void *target = (links[u].mask & 0x1 ? sh2_drc_exit : sh2_drc_dispatcher); if (links[u].bl) links[u].bl->blx = tcache_ptr; emith_jump_patch(links[u].ptr, tcache_ptr, NULL); @@ -3024,6 +3022,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u8 op_flags[BLOCK_INSN_LIMIT]; + enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 }; struct drcf { int delay_reg:8; u32 loop_type:8; @@ -3032,6 +3031,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; + u32 Tflag:2, Mflag:2; } drcf = { 0, }; #if LOOP_OPTIMIZER @@ -3169,7 +3169,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) && pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) { pinned_loops[pinned_loop_count++] = - (struct linkage) { .mask = m3, .pc = base_pc + 2*v }; + (struct linkage) { .pc = base_pc + 2*v, .mask = m3 }; } else op_flags[v] &= ~OF_BASIC_LOOP; } @@ -3220,6 +3220,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); emith_sync_t(sr); + drcf.Mflag = FLG_UNKNOWN; rcache_flush(); emith_flush(); } @@ -3302,7 +3303,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) if (blx_target_count < ARRAY_SIZE(blx_targets)) { // exit via stub in blx table (saves some 1-3 insns in the main flow) blx_targets[blx_target_count++] = - (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; + (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 }; emith_jump_patchable(tcache_ptr); } else { // blx table full, must inline exit code @@ -3319,7 +3320,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // exit via stub in blx table (saves some 1-3 insns in the main flow) emith_cmp_r_imm(sr, 0); blx_targets[blx_target_count++] = - (struct linkage) { .ptr = tcache_ptr, .pc = pc|1, .bl = NULL }; + (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 }; emith_jump_cond_patchable(DCOND_LE, tcache_ptr); } else { // blx table full, must inline exit code @@ -3704,6 +3705,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); + drcf.Mflag = FLG_0; break; case 2: // MOVT Rn 0000nnnn00101001 sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); @@ -3781,6 +3783,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_eor_r_r_lsr(tmp, tmp2, 31); emith_or_r_r(sr, tmp); // T = Q^M rcache_free(tmp); + drcf.Mflag = FLG_UNKNOWN; goto end_op; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); @@ -3846,17 +3849,16 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); + tmp4 = rcache_get_tmp(); if (op & 1) { emith_sext(tmp, tmp2, 16); - } else + emith_sext(tmp4, tmp3, 16); + } else { emith_clear_msb(tmp, tmp2, 16); - tmp2 = rcache_get_tmp(); - if (op & 1) { - emith_sext(tmp2, tmp3, 16); - } else - emith_clear_msb(tmp2, tmp3, 16); - emith_mul(tmp, tmp, tmp2); - rcache_free_tmp(tmp2); + emith_clear_msb(tmp4, tmp3, 16); + } + emith_mul(tmp, tmp, tmp4); + rcache_free_tmp(tmp4); goto end_op; } goto default_; @@ -3904,28 +3906,27 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // Q = M ^ Q1 ^ Q2 // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2) tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp4); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL); sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_sync_t(sr); - EMITH_HINT_COND(DCOND_CS); - emith_tpop_carry(sr, 0); - emith_adcf_r_r_r(tmp2, tmp4, tmp4); - emith_tpush_carry(sr, 0); // keep Q1 in T for now - rcache_free(tmp4); tmp = rcache_get_tmp(); - emith_and_r_r_imm(tmp, sr, M); - emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M + if (drcf.Mflag != FLG_0) { + emith_and_r_r_imm(tmp, sr, M); + emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M + } rcache_free_tmp(tmp); - // add or sub, invert T if carry to get Q1 ^ Q2 - // in: (Q ^ M) passed in Q, Q1 in T + // shift Rn, add T, add or sub Rm, set T = !(Q1 ^ Q2) + // in: (Q ^ M) passed in Q emith_sh2_div1_step(tmp2, tmp3, sr); tmp = rcache_get_tmp(); - emith_bic_r_imm(sr, Q); // Q = M - emith_and_r_r_imm(tmp, sr, M); - emith_or_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); - emith_and_r_r_imm(tmp, sr, T); // Q = M ^ Q1 ^ Q2 + emith_or_r_imm(sr, Q); // Q = !T + emith_and_r_r_imm(tmp, sr, T); emith_eor_r_r_lsl(sr, tmp, Q_SHIFT); - emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2) + if (drcf.Mflag != FLG_0) { // Q = M ^ !T = M ^ Q1 ^ Q2 + emith_and_r_r_imm(tmp, sr, M); + emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); + } + rcache_free_tmp(tmp); goto end_op; case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); @@ -4627,7 +4628,7 @@ end_op: // local forward jump target = tcache_ptr; blx_targets[blx_target_count++] = - (struct linkage) { .pc = target_pc, .ptr = target, .bl = NULL }; + (struct linkage) { .pc = target_pc, .ptr = target, .mask = 0x2 }; if (cond != -1) emith_jump_cond_patchable(cond, target); else {