git subrepo pull --force deps/lightrec
authorPaul Cercueil <paul@crapouillou.net>
Fri, 8 Apr 2022 17:41:22 +0000 (18:41 +0100)
committerPaul Cercueil <paul@crapouillou.net>
Fri, 8 Apr 2022 17:41:22 +0000 (18:41 +0100)
subrepo:
  subdir:   "deps/lightrec"
  merged:   "ce40f838"
upstream:
  origin:   "https://github.com/pcercuei/lightrec.git"
  branch:   "master"
  commit:   "ce40f838"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "2f68596"

deps/lightrec/.gitrepo
deps/lightrec/CMakeLists.txt
deps/lightrec/emitter.c
deps/lightrec/lightrec-private.h
deps/lightrec/lightrec.c
deps/lightrec/optimizer.c

index 38490c7..4b96823 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/lightrec.git
        branch = master
-       commit = 747da9c5d3e485f853b21bab3d158bd9b14d0500
-       parent = e8633a2e14027e4552940ef3e1c27c40b94c4870
+       commit = ce40f8388079945b60fd3f3dbef8ebaddf6f2685
+       parent = 1f22b268b62cf9a3fad39b9b642ded0890902f58
        method = merge
        cmdver = 0.4.3
index 6a139f4..9ff58d6 100644 (file)
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.0)
-project(lightrec LANGUAGES C VERSION 0.4)
+project(lightrec LANGUAGES C VERSION 0.5)
 
 set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries")
 if (NOT BUILD_SHARED_LIBS)
index 99f6756..578af87 100644 (file)
@@ -827,7 +827,7 @@ static void rec_alu_div(struct lightrec_cstate *state,
        u8 reg_hi = get_mult_div_hi(c);
        jit_state_t *_jit = block->_jit;
        jit_node_t *branch, *to_end;
-       u8 lo, hi, rs, rt, rflags = 0;
+       u8 lo = 0, hi = 0, rs, rt, rflags = 0;
 
        jit_note(__FILE__, __LINE__);
 
@@ -985,24 +985,20 @@ static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block
 {
        struct regcache *reg_cache = state->reg_cache;
        jit_state_t *_jit = block->_jit;
-       u8 tmp, tmp2, tmp3;
+       u8 tmp, tmp3;
 
        if (with_arg)
                tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
-       tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
        tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
 
        jit_ldxi(tmp, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, c_wrapper));
-       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, c_wrappers[wrapper]));
+                offsetof(struct lightrec_state, wrappers_eps[wrapper]));
        if (with_arg)
                jit_movi(tmp3, arg);
 
        jit_callr(tmp);
 
        lightrec_free_reg(reg_cache, tmp);
-       lightrec_free_reg(reg_cache, tmp2);
        if (with_arg)
                lightrec_free_reg(reg_cache, tmp3);
        lightrec_regcache_mark_live(reg_cache, _jit);
@@ -1416,21 +1412,7 @@ static void rec_special_BREAK(struct lightrec_cstate *state,
        rec_break_syscall(state, block, offset, true);
 }
 
-static void rec_mfc(struct lightrec_cstate *state,
-                   const struct block *block, u16 offset)
-{
-       struct regcache *reg_cache = state->reg_cache;
-       union code c = block->opcode_list[offset].c;
-       jit_state_t *_jit = block->_jit;
-
-       jit_note(__FILE__, __LINE__);
-       lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
-
-       call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MFC);
-}
-
-static void rec_mtc(struct lightrec_cstate *state,
-                   const struct block *block, u16 offset)
+static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        struct regcache *reg_cache = state->reg_cache;
        union code c = block->opcode_list[offset].c;
@@ -1483,7 +1465,7 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
        struct regcache *reg_cache = state->reg_cache;
        const union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       u8 rt, tmp, tmp2, status;
+       u8 rt, tmp = 0, tmp2, status;
 
        jit_note(__FILE__, __LINE__);
 
@@ -1518,13 +1500,13 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
                tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
                jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
                           offsetof(struct lightrec_state, regs.cp0[13]));
+
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
        }
 
        if (c.r.rd == 12) {
                status = rt;
        } else if (c.r.rd == 13) {
-               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
-
                /* Cause = (Cause & ~0x0300) | (value & 0x0300) */
                jit_andi(tmp2, rt, 0x0300);
                jit_ori(tmp, tmp, 0x0300);
@@ -1544,14 +1526,25 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
                jit_andi(tmp, tmp, 0x0300);
                jit_nei(tmp, tmp, 0);
                jit_andr(tmp, tmp, status);
+       }
+
+       if (c.r.rd == 12) {
+               /* Exit dynarec in case we unmask a hardware interrupt.
+                * exit_flags = !(~status & 0x401) */
+
+               jit_comr(tmp2, status);
+               jit_andi(tmp2, tmp2, 0x401);
+               jit_eqi(tmp2, tmp2, 0);
+               jit_orr(tmp, tmp, tmp2);
+       }
+
+       if (c.r.rd == 12 || c.r.rd == 13) {
                jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
                           LIGHTREC_REG_STATE, tmp);
 
                lightrec_free_reg(reg_cache, tmp);
-       }
-
-       if (c.r.rd == 13)
                lightrec_free_reg(reg_cache, tmp2);
+       }
 
        lightrec_free_reg(reg_cache, rt);
 
@@ -1591,29 +1584,253 @@ static void rec_cp0_CTC0(struct lightrec_cstate *state,
 static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       const u32 zext_regs = 0x300f0080;
+       u8 rt, tmp, tmp2, tmp3, out, flags;
+       u8 reg = c.r.rd == 15 ? 14 : c.r.rd;
+       unsigned int i;
+
        _jit_name(block->_jit, __func__);
-       rec_mfc(state, block, offset);
+
+       flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT;
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, flags);
+
+       switch (reg) {
+       case 1:
+       case 3:
+       case 5:
+       case 8:
+       case 9:
+       case 10:
+       case 11:
+               jit_ldxi_s(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               break;
+       case 7:
+       case 16:
+       case 17:
+       case 18:
+       case 19:
+               jit_ldxi_us(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               break;
+       case 28:
+       case 29:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+               tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               for (i = 0; i < 3; i++) {
+                       out = i == 0 ? rt : tmp;
+
+                       jit_ldxi_s(tmp, LIGHTREC_REG_STATE,
+                                  offsetof(struct lightrec_state, regs.cp2d[9 + i]));
+                       jit_movi(tmp2, 0x1f);
+                       jit_rshi(out, tmp, 7);
+
+                       jit_ltr(tmp3, tmp2, out);
+                       jit_movnr(out, tmp2, tmp3);
+
+                       jit_gei(tmp2, out, 0);
+                       jit_movzr(out, tmp2, tmp2);
+
+                       if (i > 0) {
+                               jit_lshi(tmp, tmp, 5 * i);
+                               jit_orr(rt, rt, tmp);
+                       }
+               }
+
+
+               lightrec_free_reg(reg_cache, tmp);
+               lightrec_free_reg(reg_cache, tmp2);
+               lightrec_free_reg(reg_cache, tmp3);
+               break;
+       default:
+               jit_ldxi_i(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               break;
+       }
+
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 rt;
+
        _jit_name(block->_jit, __func__);
-       rec_mfc(state, block, offset);
+
+       switch (c.r.rd) {
+       case 4:
+       case 12:
+       case 20:
+       case 26:
+       case 27:
+       case 29:
+       case 30:
+               rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT);
+               jit_ldxi_s(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
+               break;
+       default:
+               rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT);
+               jit_ldxi_i(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
+               break;
+       }
+
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       jit_node_t *loop, *to_loop;
+       u8 rt, tmp, tmp2, flags = 0;
+
        _jit_name(block->_jit, __func__);
-       rec_mtc(state, block, offset);
+
+       if (c.r.rd == 31)
+               return;
+
+       if (c.r.rd == 30)
+               flags |= REG_EXT;
+
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
+
+       switch (c.r.rd) {
+       case 15:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[13]));
+
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+               jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[14]));
+
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]),
+                          LIGHTREC_REG_STATE, tmp);
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]),
+                          LIGHTREC_REG_STATE, tmp2);
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]),
+                          LIGHTREC_REG_STATE, rt);
+
+               lightrec_free_reg(reg_cache, tmp);
+               lightrec_free_reg(reg_cache, tmp2);
+               break;
+       case 28:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               jit_lshi(tmp, rt, 7);
+               jit_andi(tmp, tmp, 0xf80);
+               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]),
+                          LIGHTREC_REG_STATE, tmp);
+
+               jit_lshi(tmp, rt, 2);
+               jit_andi(tmp, tmp, 0xf80);
+               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]),
+                          LIGHTREC_REG_STATE, tmp);
+
+               jit_rshi(tmp, rt, 3);
+               jit_andi(tmp, tmp, 0xf80);
+               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]),
+                          LIGHTREC_REG_STATE, tmp);
+
+               lightrec_free_reg(reg_cache, tmp);
+               break;
+       case 30:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               /* if (rt < 0) rt = ~rt; */
+               jit_rshi(tmp, rt, 31);
+               jit_xorr(tmp, rt, tmp);
+
+               /* We know the sign bit is 0. Left-shift by 1 to start the algorithm */
+               jit_lshi(tmp, tmp, 1);
+               jit_movi(tmp2, 33);
+
+               /* Decrement tmp2 and right-shift the value by 1 until it equals zero */
+               loop = jit_label();
+               jit_subi(tmp2, tmp2, 1);
+               jit_rshi_u(tmp, tmp, 1);
+               to_loop = jit_bnei(tmp, 0);
+
+               jit_patch_at(to_loop, loop);
+
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]),
+                          LIGHTREC_REG_STATE, tmp2);
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]),
+                          LIGHTREC_REG_STATE, rt);
+
+               lightrec_free_reg(reg_cache, tmp);
+               lightrec_free_reg(reg_cache, tmp2);
+               break;
+       default:
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]),
+                          LIGHTREC_REG_STATE, rt);
+               break;
+       }
+
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 rt, tmp, tmp2;
+
        _jit_name(block->_jit, __func__);
-       rec_mtc(state, block, offset);
+
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
+
+       switch (c.r.rd) {
+       case 4:
+       case 12:
+       case 20:
+       case 26:
+       case 27:
+       case 29:
+       case 30:
+               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
+                          LIGHTREC_REG_STATE, rt);
+               break;
+       case 31:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               jit_andi(tmp, rt, 0x7f87e000);
+               jit_nei(tmp, tmp, 0);
+               jit_lshi(tmp, tmp, 31);
+
+               jit_andi(tmp2, rt, 0x7ffff000);
+               jit_orr(tmp, tmp2, tmp);
+
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]),
+                          LIGHTREC_REG_STATE, tmp);
+
+               lightrec_free_reg(reg_cache, tmp);
+               lightrec_free_reg(reg_cache, tmp2);
+               break;
+
+       default:
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
+                          LIGHTREC_REG_STATE, rt);
+       }
+
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_cp0_RFE(struct lightrec_cstate *state,
index e9efcb5..4b797a1 100644 (file)
@@ -98,7 +98,6 @@ struct lightrec_branch_target {
 enum c_wrappers {
        C_WRAPPER_RW,
        C_WRAPPER_RW_GENERIC,
-       C_WRAPPER_MFC,
        C_WRAPPER_MTC,
        C_WRAPPER_CP,
        C_WRAPPER_SYSCALL,
@@ -128,7 +127,8 @@ struct lightrec_state {
        u32 exit_flags;
        u32 old_cycle_counter;
        struct block *dispatcher, *c_wrapper_block;
-       void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT];
+       void *c_wrappers[C_WRAPPERS_COUNT];
+       void *wrappers_eps[C_WRAPPERS_COUNT];
        struct tinymm *tinymm;
        struct blockcache *block_cache;
        struct recompiler *rec;
index 3a6e112..d4ab419 100644 (file)
@@ -406,17 +406,9 @@ u32 lightrec_mfc(struct lightrec_state *state, union code op)
                return state->regs.cp2c[op.r.rd];
 }
 
-static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
-{
-       u32 rt = lightrec_mfc(state, op);
-
-       if (op.r.rt)
-               state->regs.gpr[op.r.rt] = rt;
-}
-
 static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
 {
-       u32 status, cause;
+       u32 status, oldstatus, cause;
 
        switch (reg) {
        case 1:
@@ -426,12 +418,13 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
        case 15:
                /* Those registers are read-only */
                return;
-       default: /* fall-through */
+       default:
                break;
        }
 
        if (reg == 12) {
                status = state->regs.cp0[12];
+               oldstatus = status;
 
                if (status & ~data & BIT(16)) {
                        state->ops.enable_ram(state, true);
@@ -441,14 +434,24 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
                }
        }
 
-       state->regs.cp0[reg] = data;
+       if (reg == 13) {
+               state->regs.cp0[13] &= ~0x300;
+               state->regs.cp0[13] |= data & 0x300;
+       } else {
+               state->regs.cp0[reg] = data;
+       }
 
        if (reg == 12 || reg == 13) {
                cause = state->regs.cp0[13];
                status = state->regs.cp0[12];
 
+               /* Handle software interrupts */
                if (!!(status & cause & 0x300) & status)
                        lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
+
+               /* Handle hardware interrupts */
+               if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401))
+                       lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
        }
 }
 
@@ -684,6 +687,7 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        int stack_ptr;
        jit_word_t code_size;
        jit_node_t *to_tramp, *to_fn_epilog;
+       jit_node_t *addr[C_WRAPPERS_COUNT - 1];
 
        block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
        if (!block)
@@ -698,9 +702,22 @@ static struct block * generate_wrapper(struct lightrec_state *state)
 
        /* Wrapper entry point */
        jit_prolog();
+       jit_tramp(256);
+
+       /* Add entry points; separate them by opcodes that increment
+        * LIGHTREC_REG_STATE (since we cannot touch other registers).
+        * The difference will then tell us which C function to call. */
+       for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) {
+               jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8);
+               addr[i - 1] = jit_indirect();
+       }
+
+       jit_epilog();
+       jit_prolog();
 
        stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
 
+       /* Save all temporaries on stack */
        for (i = 0; i < NUM_TEMPS; i++)
                jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
 
@@ -710,6 +727,7 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        /* The trampoline will jump back here */
        to_fn_epilog = jit_label();
 
+       /* Restore temporaries from stack */
        for (i = 0; i < NUM_TEMPS; i++)
                jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
 
@@ -724,6 +742,13 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        jit_tramp(256);
        jit_patch(to_tramp);
 
+       /* Retrieve the wrapper function */
+       jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, c_wrappers));
+
+       /* Restore LIGHTREC_REG_STATE to its correct value */
+       jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state);
+
        jit_prepare();
        jit_pushargr(LIGHTREC_REG_STATE);
        jit_pushargr(LIGHTREC_REG_CYCLE);
@@ -741,6 +766,11 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        block->flags = 0;
        block->nb_ops = 0;
 
+       state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function;
+
+       for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
+               state->wrappers_eps[i] = jit_address(addr[i]);
+
        jit_get_code(&code_size);
        lightrec_register(MEM_FOR_CODE, code_size);
 
@@ -943,7 +973,7 @@ err_no_mem:
 
 union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
 {
-       void *host;
+       void *host = NULL;
 
        lightrec_get_map(state, &host, kunseg(pc));
 
@@ -1261,13 +1291,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                         * finishes. */
                        if (ENABLE_THREADED_COMPILER)
                                lightrec_recompiler_remove(state->rec, block2);
+               }
 
-                       /* We know from now on that block2 isn't going to be
-                        * compiled. We can override the LUT entry with our
-                        * new block's entry point. */
-                       offset = lut_offset(block->pc) + target->offset;
-                       state->code_lut[offset] = jit_address(target->label);
+               /* We know from now on that block2 (if present) isn't going to
+                * be compiled. We can override the LUT entry with our new
+                * block's entry point. */
+               offset = lut_offset(block->pc) + target->offset;
+               state->code_lut[offset] = jit_address(target->label);
 
+               if (block2) {
                        pr_debug("Reap block 0x%08x as it's covered by block "
                                 "0x%08x\n", block2->pc, block->pc);
 
@@ -1487,11 +1519,8 @@ struct lightrec_state * lightrec_init(char *argv0,
        if (!state->c_wrapper_block)
                goto err_free_dispatcher;
 
-       state->c_wrapper = state->c_wrapper_block->function;
-
        state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb;
        state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
-       state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb;
        state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
        state->c_wrappers[C_WRAPPER_CP] = lightrec_cp;
        state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
index 98a26f6..f719192 100644 (file)
@@ -463,6 +463,10 @@ static u32 lightrec_propagate_consts(const struct opcode *op, u32 known, u32 *v)
 {
        union code c = op->c;
 
+       /* Register $zero is always, well, zero */
+       known |= BIT(0);
+       v[0] = 0;
+
        if (op->flags & LIGHTREC_SYNC)
                return 0;
 
@@ -833,10 +837,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                if (!op->opcode)
                        continue;
 
-               /* Register $zero is always, well, zero */
-               known |= BIT(0);
-               values[0] = 0;
-
                switch (op->i.op) {
                case OP_BEQ:
                        if (op->i.rs == op->i.rt) {
@@ -1238,10 +1238,6 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
        for (i = 0; i < block->nb_ops; i++) {
                list = &block->opcode_list[i];
 
-               /* Register $zero is always, well, zero */
-               known |= BIT(0);
-               values[0] = 0;
-
                switch (list->i.op) {
                case OP_SB:
                case OP_SH:
@@ -1476,11 +1472,22 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset,
        }
 }
 
+static bool lightrec_always_skip_div_check(void)
+{
+#ifdef __mips__
+       return true;
+#else
+       return false;
+#endif
+}
+
 static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
 {
        struct opcode *list;
        u8 reg_hi, reg_lo;
        unsigned int i;
+       u32 known = BIT(0);
+       u32 values[32] = { 0 };
 
        for (i = 0; i < block->nb_ops - 1; i++) {
                list = &block->opcode_list[i];
@@ -1489,19 +1496,27 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *
                        continue;
 
                switch (list->r.op) {
-               case OP_SPECIAL_MULT:
-               case OP_SPECIAL_MULTU:
                case OP_SPECIAL_DIV:
                case OP_SPECIAL_DIVU:
+                       /* If we are dividing by a non-zero constant, don't
+                        * emit the div-by-zero check. */
+                       if (lightrec_always_skip_div_check() ||
+                           (known & BIT(list->c.r.rt) && values[list->c.r.rt]))
+                               list->flags |= LIGHTREC_NO_DIV_CHECK;
+               case OP_SPECIAL_MULT: /* fall-through */
+               case OP_SPECIAL_MULTU:
                        break;
                default:
+                       known = lightrec_propagate_consts(list, known, values);
                        continue;
                }
 
                /* Don't support opcodes in delay slots */
                if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
-                   (list->flags & LIGHTREC_NO_DS))
+                   (list->flags & LIGHTREC_NO_DS)) {
+                       known = lightrec_propagate_consts(list, known, values);
                        continue;
+               }
 
                reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
                if (reg_lo == 0) {
@@ -1543,6 +1558,8 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *
                } else {
                        list->r.imm = 0;
                }
+
+               known = lightrec_propagate_consts(list, known, values);
        }
 
        return 0;