From fd58fa3240e46c4d6027c28f3882a4c584af7123 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 8 Apr 2022 18:41:22 +0100 Subject: [PATCH] git subrepo pull --force deps/lightrec subrepo: subdir: "deps/lightrec" merged: "ce40f838" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "ce40f838" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" --- deps/lightrec/.gitrepo | 4 +- deps/lightrec/CMakeLists.txt | 2 +- deps/lightrec/emitter.c | 281 +++++++++++++++++++++++++++---- deps/lightrec/lightrec-private.h | 4 +- deps/lightrec/lightrec.c | 69 +++++--- deps/lightrec/optimizer.c | 39 +++-- 6 files changed, 331 insertions(+), 68 deletions(-) diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 38490c7f..4b96823a 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = 747da9c5d3e485f853b21bab3d158bd9b14d0500 - parent = e8633a2e14027e4552940ef3e1c27c40b94c4870 + commit = ce40f8388079945b60fd3f3dbef8ebaddf6f2685 + parent = 1f22b268b62cf9a3fad39b9b642ded0890902f58 method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index 6a139f4d..9ff58d62 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -project(lightrec LANGUAGES C VERSION 0.4) +project(lightrec LANGUAGES C VERSION 0.5) set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries") if (NOT BUILD_SHARED_LIBS) diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 99f6756d..578af874 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -827,7 +827,7 @@ static void rec_alu_div(struct lightrec_cstate *state, u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; jit_node_t *branch, *to_end; - u8 lo, hi, rs, rt, rflags = 0; + u8 lo = 0, hi = 0, rs, rt, rflags = 0; jit_note(__FILE__, __LINE__); @@ -985,24 +985,20 @@ static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - u8 tmp, tmp2, tmp3; + u8 tmp, tmp3; if (with_arg) tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1); - tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R0); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); jit_ldxi(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrapper)); - jit_ldxi(tmp2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrappers[wrapper])); + offsetof(struct lightrec_state, wrappers_eps[wrapper])); if (with_arg) jit_movi(tmp3, arg); jit_callr(tmp); lightrec_free_reg(reg_cache, tmp); - lightrec_free_reg(reg_cache, tmp2); if (with_arg) lightrec_free_reg(reg_cache, tmp3); lightrec_regcache_mark_live(reg_cache, _jit); @@ -1416,21 +1412,7 @@ static void rec_special_BREAK(struct lightrec_cstate *state, rec_break_syscall(state, block, offset, true); } -static void rec_mfc(struct lightrec_cstate *state, - const struct block *block, u16 offset) -{ - struct regcache *reg_cache = state->reg_cache; - union code c = block->opcode_list[offset].c; - jit_state_t *_jit = block->_jit; - - jit_note(__FILE__, __LINE__); - lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true); - - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MFC); -} - -static void rec_mtc(struct lightrec_cstate *state, - const struct block *block, u16 offset) +static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -1483,7 +1465,7 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) struct regcache *reg_cache = state->reg_cache; const union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; - u8 rt, tmp, tmp2, status; + u8 rt, tmp = 0, tmp2, status; jit_note(__FILE__, __LINE__); @@ -1518,13 +1500,13 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) tmp = lightrec_alloc_reg_temp(reg_cache, _jit); jit_ldxi_i(tmp, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, regs.cp0[13])); + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); } if (c.r.rd == 12) { status = rt; } else if (c.r.rd == 13) { - tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); - /* Cause = (Cause & ~0x0300) | (value & 0x0300) */ jit_andi(tmp2, rt, 0x0300); jit_ori(tmp, tmp, 0x0300); @@ -1544,14 +1526,25 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) jit_andi(tmp, tmp, 0x0300); jit_nei(tmp, tmp, 0); jit_andr(tmp, tmp, status); + } + + if (c.r.rd == 12) { + /* Exit dynarec in case we unmask a hardware interrupt. + * exit_flags = !(~status & 0x401) */ + + jit_comr(tmp2, status); + jit_andi(tmp2, tmp2, 0x401); + jit_eqi(tmp2, tmp2, 0); + jit_orr(tmp, tmp, tmp2); + } + + if (c.r.rd == 12 || c.r.rd == 13) { jit_stxi_i(offsetof(struct lightrec_state, exit_flags), LIGHTREC_REG_STATE, tmp); lightrec_free_reg(reg_cache, tmp); - } - - if (c.r.rd == 13) lightrec_free_reg(reg_cache, tmp2); + } lightrec_free_reg(reg_cache, rt); @@ -1591,29 +1584,253 @@ static void rec_cp0_CTC0(struct lightrec_cstate *state, static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + const u32 zext_regs = 0x300f0080; + u8 rt, tmp, tmp2, tmp3, out, flags; + u8 reg = c.r.rd == 15 ? 14 : c.r.rd; + unsigned int i; + _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + + flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT; + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, flags); + + switch (reg) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + jit_ldxi_s(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[reg])); + break; + case 7: + case 16: + case 17: + case 18: + case 19: + jit_ldxi_us(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[reg])); + break; + case 28: + case 29: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); + + for (i = 0; i < 3; i++) { + out = i == 0 ? rt : tmp; + + jit_ldxi_s(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[9 + i])); + jit_movi(tmp2, 0x1f); + jit_rshi(out, tmp, 7); + + jit_ltr(tmp3, tmp2, out); + jit_movnr(out, tmp2, tmp3); + + jit_gei(tmp2, out, 0); + jit_movzr(out, tmp2, tmp2); + + if (i > 0) { + jit_lshi(tmp, tmp, 5 * i); + jit_orr(rt, rt, tmp); + } + } + + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + lightrec_free_reg(reg_cache, tmp3); + break; + default: + jit_ldxi_i(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[reg])); + break; + } + + lightrec_free_reg(reg_cache, rt); } static void rec_cp2_basic_CFC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt; + _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + + switch (c.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT); + jit_ldxi_s(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2c[c.r.rd])); + break; + default: + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT); + jit_ldxi_i(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2c[c.r.rd])); + break; + } + + lightrec_free_reg(reg_cache, rt); } static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + jit_node_t *loop, *to_loop; + u8 rt, tmp, tmp2, flags = 0; + _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + + if (c.r.rd == 31) + return; + + if (c.r.rd == 30) + flags |= REG_EXT; + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags); + + switch (c.r.rd) { + case 15: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[13])); + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[14])); + + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]), + LIGHTREC_REG_STATE, tmp); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]), + LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]), + LIGHTREC_REG_STATE, rt); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + case 28: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_lshi(tmp, rt, 7); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]), + LIGHTREC_REG_STATE, tmp); + + jit_lshi(tmp, rt, 2); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]), + LIGHTREC_REG_STATE, tmp); + + jit_rshi(tmp, rt, 3); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); + break; + case 30: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + /* if (rt < 0) rt = ~rt; */ + jit_rshi(tmp, rt, 31); + jit_xorr(tmp, rt, tmp); + + /* We know the sign bit is 0. Left-shift by 1 to start the algorithm */ + jit_lshi(tmp, tmp, 1); + jit_movi(tmp2, 33); + + /* Decrement tmp2 and right-shift the value by 1 until it equals zero */ + loop = jit_label(); + jit_subi(tmp2, tmp2, 1); + jit_rshi_u(tmp, tmp, 1); + to_loop = jit_bnei(tmp, 0); + + jit_patch_at(to_loop, loop); + + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]), + LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]), + LIGHTREC_REG_STATE, rt); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + default: + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]), + LIGHTREC_REG_STATE, rt); + break; + } + + lightrec_free_reg(reg_cache, rt); } static void rec_cp2_basic_CTC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt, tmp, tmp2; + _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0); + + switch (c.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]), + LIGHTREC_REG_STATE, rt); + break; + case 31: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_andi(tmp, rt, 0x7f87e000); + jit_nei(tmp, tmp, 0); + jit_lshi(tmp, tmp, 31); + + jit_andi(tmp2, rt, 0x7ffff000); + jit_orr(tmp, tmp2, tmp); + + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + + default: + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]), + LIGHTREC_REG_STATE, rt); + } + + lightrec_free_reg(reg_cache, rt); } static void rec_cp0_RFE(struct lightrec_cstate *state, diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index e9efcb5e..4b797a1d 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -98,7 +98,6 @@ struct lightrec_branch_target { enum c_wrappers { C_WRAPPER_RW, C_WRAPPER_RW_GENERIC, - C_WRAPPER_MFC, C_WRAPPER_MTC, C_WRAPPER_CP, C_WRAPPER_SYSCALL, @@ -128,7 +127,8 @@ struct lightrec_state { u32 exit_flags; u32 old_cycle_counter; struct block *dispatcher, *c_wrapper_block; - void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT]; + void *c_wrappers[C_WRAPPERS_COUNT]; + void *wrappers_eps[C_WRAPPERS_COUNT]; struct tinymm *tinymm; struct blockcache *block_cache; struct recompiler *rec; diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 3a6e1129..d4ab419f 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -406,17 +406,9 @@ u32 lightrec_mfc(struct lightrec_state *state, union code op) return state->regs.cp2c[op.r.rd]; } -static void lightrec_mfc_cb(struct lightrec_state *state, union code op) -{ - u32 rt = lightrec_mfc(state, op); - - if (op.r.rt) - state->regs.gpr[op.r.rt] = rt; -} - static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) { - u32 status, cause; + u32 status, oldstatus, cause; switch (reg) { case 1: @@ -426,12 +418,13 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) case 15: /* Those registers are read-only */ return; - default: /* fall-through */ + default: break; } if (reg == 12) { status = state->regs.cp0[12]; + oldstatus = status; if (status & ~data & BIT(16)) { state->ops.enable_ram(state, true); @@ -441,14 +434,24 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) } } - state->regs.cp0[reg] = data; + if (reg == 13) { + state->regs.cp0[13] &= ~0x300; + state->regs.cp0[13] |= data & 0x300; + } else { + state->regs.cp0[reg] = data; + } if (reg == 12 || reg == 13) { cause = state->regs.cp0[13]; status = state->regs.cp0[12]; + /* Handle software interrupts */ if (!!(status & cause & 0x300) & status) lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); + + /* Handle hardware interrupts */ + if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401)) + lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); } } @@ -684,6 +687,7 @@ static struct block * generate_wrapper(struct lightrec_state *state) int stack_ptr; jit_word_t code_size; jit_node_t *to_tramp, *to_fn_epilog; + jit_node_t *addr[C_WRAPPERS_COUNT - 1]; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -698,9 +702,22 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* Wrapper entry point */ jit_prolog(); + jit_tramp(256); + + /* Add entry points; separate them by opcodes that increment + * LIGHTREC_REG_STATE (since we cannot touch other registers). + * The difference will then tell us which C function to call. */ + for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { + jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8); + addr[i - 1] = jit_indirect(); + } + + jit_epilog(); + jit_prolog(); stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS); + /* Save all temporaries on stack */ for (i = 0; i < NUM_TEMPS; i++) jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); @@ -710,6 +727,7 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* The trampoline will jump back here */ to_fn_epilog = jit_label(); + /* Restore temporaries from stack */ for (i = 0; i < NUM_TEMPS; i++) jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t)); @@ -724,6 +742,13 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_tramp(256); jit_patch(to_tramp); + /* Retrieve the wrapper function */ + jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers)); + + /* Restore LIGHTREC_REG_STATE to its correct value */ + jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state); + jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(LIGHTREC_REG_CYCLE); @@ -741,6 +766,11 @@ static struct block * generate_wrapper(struct lightrec_state *state) block->flags = 0; block->nb_ops = 0; + state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function; + + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + state->wrappers_eps[i] = jit_address(addr[i]); + jit_get_code(&code_size); lightrec_register(MEM_FOR_CODE, code_size); @@ -943,7 +973,7 @@ err_no_mem: union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) { - void *host; + void *host = NULL; lightrec_get_map(state, &host, kunseg(pc)); @@ -1261,13 +1291,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, * finishes. */ if (ENABLE_THREADED_COMPILER) lightrec_recompiler_remove(state->rec, block2); + } - /* We know from now on that block2 isn't going to be - * compiled. We can override the LUT entry with our - * new block's entry point. */ - offset = lut_offset(block->pc) + target->offset; - state->code_lut[offset] = jit_address(target->label); + /* We know from now on that block2 (if present) isn't going to + * be compiled. We can override the LUT entry with our new + * block's entry point. */ + offset = lut_offset(block->pc) + target->offset; + state->code_lut[offset] = jit_address(target->label); + if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " "0x%08x\n", block2->pc, block->pc); @@ -1487,11 +1519,8 @@ struct lightrec_state * lightrec_init(char *argv0, if (!state->c_wrapper_block) goto err_free_dispatcher; - state->c_wrapper = state->c_wrapper_block->function; - state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb; state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb; - state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb; state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; state->c_wrappers[C_WRAPPER_CP] = lightrec_cp; state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb; diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 98a26f60..f719192b 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -463,6 +463,10 @@ static u32 lightrec_propagate_consts(const struct opcode *op, u32 known, u32 *v) { union code c = op->c; + /* Register $zero is always, well, zero */ + known |= BIT(0); + v[0] = 0; + if (op->flags & LIGHTREC_SYNC) return 0; @@ -833,10 +837,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl if (!op->opcode) continue; - /* Register $zero is always, well, zero */ - known |= BIT(0); - values[0] = 0; - switch (op->i.op) { case OP_BEQ: if (op->i.rs == op->i.rt) { @@ -1238,10 +1238,6 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) for (i = 0; i < block->nb_ops; i++) { list = &block->opcode_list[i]; - /* Register $zero is always, well, zero */ - known |= BIT(0); - values[0] = 0; - switch (list->i.op) { case OP_SB: case OP_SH: @@ -1476,11 +1472,22 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset, } } +static bool lightrec_always_skip_div_check(void) +{ +#ifdef __mips__ + return true; +#else + return false; +#endif +} + static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block) { struct opcode *list; u8 reg_hi, reg_lo; unsigned int i; + u32 known = BIT(0); + u32 values[32] = { 0 }; for (i = 0; i < block->nb_ops - 1; i++) { list = &block->opcode_list[i]; @@ -1489,19 +1496,27 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * continue; switch (list->r.op) { - case OP_SPECIAL_MULT: - case OP_SPECIAL_MULTU: case OP_SPECIAL_DIV: case OP_SPECIAL_DIVU: + /* If we are dividing by a non-zero constant, don't + * emit the div-by-zero check. */ + if (lightrec_always_skip_div_check() || + (known & BIT(list->c.r.rt) && values[list->c.r.rt])) + list->flags |= LIGHTREC_NO_DIV_CHECK; + case OP_SPECIAL_MULT: /* fall-through */ + case OP_SPECIAL_MULTU: break; default: + known = lightrec_propagate_consts(list, known, values); continue; } /* Don't support opcodes in delay slots */ if ((i && has_delay_slot(block->opcode_list[i - 1].c)) || - (list->flags & LIGHTREC_NO_DS)) + (list->flags & LIGHTREC_NO_DS)) { + known = lightrec_propagate_consts(list, known, values); continue; + } reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false); if (reg_lo == 0) { @@ -1543,6 +1558,8 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * } else { list->r.imm = 0; } + + known = lightrec_propagate_consts(list, known, values); } return 0; -- 2.39.2