Update lightrec 20220910 (#686)
[pcsx_rearmed.git] / deps / lightrec / interpreter.c
index f586685..43bea83 100644 (file)
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1-or-later
 /*
- * Copyright (C) 2019-2020 Paul Cercueil <paul@crapouillou.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
 #include "disassembler.h"
@@ -39,18 +30,45 @@ struct interpreter {
        struct opcode *op;
        u32 cycles;
        bool delay_slot;
+       u16 offset;
 };
 
+static u32 int_get_branch_pc(const struct interpreter *inter)
+{
+       return get_branch_pc(inter->block, inter->offset, 0);
+}
+
+static inline u32 int_get_ds_pc(const struct interpreter *inter, s16 imm)
+{
+       return get_ds_pc(inter->block, inter->offset, imm);
+}
+
+static inline struct opcode *next_op(const struct interpreter *inter)
+{
+       return &inter->block->opcode_list[inter->offset + 1];
+}
+
 static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter)
 {
        return (*func)(inter);
 }
 
+static inline u32 lightrec_int_op(struct interpreter *inter)
+{
+       return execute(int_standard[inter->op->i.op], inter);
+}
+
 static inline u32 jump_skip(struct interpreter *inter)
 {
-       inter->op = inter->op->next;
+       inter->op = next_op(inter);
+       inter->offset++;
 
-       return execute(int_standard[inter->op->i.op], inter);
+       if (op_flag_sync(inter->op->flags)) {
+               inter->state->current_cycle += inter->cycles;
+               inter->cycles = 0;
+       }
+
+       return lightrec_int_op(inter);
 }
 
 static inline u32 jump_next(struct interpreter *inter)
@@ -70,7 +88,8 @@ static inline u32 jump_after_branch(struct interpreter *inter)
        if (unlikely(inter->delay_slot))
                return 0;
 
-       inter->op = inter->op->next;
+       inter->op = next_op(inter);
+       inter->offset++;
 
        return jump_skip(inter);
 }
@@ -82,9 +101,9 @@ static void update_cycles_before_branch(struct interpreter *inter)
        if (!inter->delay_slot) {
                cycles = lightrec_cycles_of_opcode(inter->op->c);
 
-               if (has_delay_slot(inter->op->c) &&
-                   !(inter->op->flags & LIGHTREC_NO_DS))
-                       cycles += lightrec_cycles_of_opcode(inter->op->next->c);
+               if (!op_flag_no_ds(inter->op->flags) &&
+                   has_delay_slot(inter->op->c))
+                       cycles += lightrec_cycles_of_opcode(next_op(inter)->c);
 
                inter->cycles += cycles;
                inter->state->current_cycle += inter->cycles;
@@ -101,10 +120,8 @@ static bool is_branch_taken(const u32 *reg_cache, union code op)
        case OP_JAL:
                return true;
        case OP_BEQ:
-       case OP_META_BEQZ:
                return reg_cache[op.r.rs] == reg_cache[op.r.rt];
        case OP_BNE:
-       case OP_META_BNEZ:
                return reg_cache[op.r.rs] != reg_cache[op.r.rt];
        case OP_REGIMM:
                switch (op.r.rt) {
@@ -125,8 +142,8 @@ static bool is_branch_taken(const u32 *reg_cache, union code op)
 static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
 {
        struct lightrec_state *state = inter->state;
-       u32 *reg_cache = state->native_reg_cache;
-       struct opcode new_op, *op = inter->op->next;
+       u32 *reg_cache = state->regs.gpr;
+       struct opcode new_op, *op = next_op(inter);
        union code op_next;
        struct interpreter inter2 = {
                .state = state,
@@ -150,8 +167,8 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                 * but on branch boundaries, we need to adjust the return
                 * address so that the GTE opcode is effectively executed.
                 */
-               cause = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 13);
-               epc = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 14);
+               cause = state->regs.cp0[13];
+               epc = state->regs.cp0[14];
 
                if (!(cause & 0x7c) && epc == pc - 4)
                        pc -= 4;
@@ -223,12 +240,10 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                } else {
                        new_op.c = op_next;
                        new_op.flags = 0;
-                       new_op.offset = 0;
-                       new_op.next = NULL;
                        inter2.op = &new_op;
 
                        /* Execute the first opcode of the next block */
-                       (*int_standard[inter2.op->i.op])(&inter2);
+                       lightrec_int_op(&inter2);
 
                        if (save_rs) {
                                new_rs = reg_cache[op->r.rs];
@@ -238,8 +253,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                        inter->cycles += lightrec_cycles_of_opcode(op_next);
                }
        } else {
-               next_pc = inter->block->pc
-                       + (inter->op->offset + 2) * sizeof(u32);
+               next_pc = int_get_ds_pc(inter, 2);
        }
 
        inter2.block = inter->block;
@@ -250,7 +264,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                new_rt = reg_cache[op->r.rt];
 
        /* Execute delay slot opcode */
-       ds_next_pc = (*int_standard[inter2.op->i.op])(&inter2);
+       ds_next_pc = lightrec_int_op(&inter2);
 
        if (branch_at_addr) {
                if (op_next.i.op == OP_SPECIAL)
@@ -286,8 +300,6 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
 
                new_op.c = op_next;
                new_op.flags = 0;
-               new_op.offset = sizeof(u32);
-               new_op.next = NULL;
                inter2.op = &new_op;
                inter2.block = NULL;
 
@@ -295,7 +307,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
 
                pr_debug("Running delay slot of branch at target of impossible "
                         "branch\n");
-               (*int_standard[inter2.op->i.op])(&inter2);
+               lightrec_int_op(&inter2);
        }
 
        return next_pc;
@@ -311,13 +323,13 @@ static u32 int_unimplemented(struct interpreter *inter)
 static u32 int_jump(struct interpreter *inter, bool link)
 {
        struct lightrec_state *state = inter->state;
-       u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+       u32 old_pc = int_get_branch_pc(inter);
        u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2);
 
        if (link)
-               state->native_reg_cache[31] = old_pc + 8;
+               state->regs.gpr[31] = old_pc + 8;
 
-       if (inter->op->flags & LIGHTREC_NO_DS)
+       if (op_flag_no_ds(inter->op->flags))
                return pc;
 
        return int_delay_slot(inter, pc, true);
@@ -336,14 +348,18 @@ static u32 int_JAL(struct interpreter *inter)
 static u32 int_jumpr(struct interpreter *inter, u8 link_reg)
 {
        struct lightrec_state *state = inter->state;
-       u32 old_pc, next_pc = state->native_reg_cache[inter->op->r.rs];
+       u32 old_pc = int_get_branch_pc(inter);
+       u32 next_pc = state->regs.gpr[inter->op->r.rs];
 
-       if (link_reg) {
-               old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
-               state->native_reg_cache[link_reg] = old_pc + 8;
+       if (op_flag_emulate_branch(inter->op->flags) && inter->offset) {
+               inter->cycles -= lightrec_cycles_of_opcode(inter->op->c);
+               return old_pc;
        }
 
-       if (inter->op->flags & LIGHTREC_NO_DS)
+       if (link_reg)
+               state->regs.gpr[link_reg] = old_pc + 8;
+
+       if (op_flag_no_ds(inter->op->flags))
                return next_pc;
 
        return int_delay_slot(inter, next_pc, true);
@@ -361,11 +377,10 @@ static u32 int_special_JALR(struct interpreter *inter)
 
 static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc)
 {
-       if (!inter->delay_slot &&
-           (inter->op->flags & LIGHTREC_LOCAL_BRANCH) &&
+       if (!inter->delay_slot && op_flag_local_branch(inter->op->flags) &&
            (s16)inter->op->c.i.imm >= 0) {
                next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
-               next_pc = lightrec_emulate_block(inter->block, next_pc);
+               next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc);
        }
 
        return next_pc;
@@ -376,9 +391,14 @@ static u32 int_branch(struct interpreter *inter, u32 pc,
 {
        u32 next_pc = pc + 4 + ((s16)code.i.imm << 2);
 
+       if (op_flag_emulate_branch(inter->op->flags) && inter->offset) {
+               inter->cycles -= lightrec_cycles_of_opcode(inter->op->c);
+               return pc;
+       }
+
        update_cycles_before_branch(inter);
 
-       if (inter->op->flags & LIGHTREC_NO_DS) {
+       if (op_flag_no_ds(inter->op->flags)) {
                if (branch)
                        return int_do_branch(inter, pc, next_pc);
                else
@@ -391,7 +411,7 @@ static u32 int_branch(struct interpreter *inter, u32 pc,
        if (branch)
                return int_do_branch(inter, pc, next_pc);
 
-       if (inter->op->flags & LIGHTREC_EMULATE_BRANCH)
+       if (op_flag_emulate_branch(inter->op->flags))
                return pc + 8;
        else
                return jump_after_branch(inter);
@@ -399,10 +419,10 @@ static u32 int_branch(struct interpreter *inter, u32 pc,
 
 static u32 int_beq(struct interpreter *inter, bool bne)
 {
-       u32 rs, rt, old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+       u32 rs, rt, old_pc = int_get_branch_pc(inter);
 
-       rs = inter->state->native_reg_cache[inter->op->i.rs];
-       rt = inter->state->native_reg_cache[inter->op->i.rt];
+       rs = inter->state->regs.gpr[inter->op->i.rs];
+       rt = inter->state->regs.gpr[inter->op->i.rt];
 
        return int_branch(inter, old_pc, inter->op->c, (rs == rt) ^ bne);
 }
@@ -419,13 +439,13 @@ static u32 int_BNE(struct interpreter *inter)
 
 static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm)
 {
-       u32 old_pc = inter->block->pc + inter->op->offset * sizeof(u32);
+       u32 old_pc = int_get_branch_pc(inter);
        s32 rs;
 
        if (link)
-               inter->state->native_reg_cache[31] = old_pc + 8;
+               inter->state->regs.gpr[31] = old_pc + 8;
 
-       rs = (s32)inter->state->native_reg_cache[inter->op->i.rs];
+       rs = (s32)inter->state->regs.gpr[inter->op->i.rs];
 
        return int_branch(inter, old_pc, inter->op->c,
                          ((regimm && !rs) || rs > 0) ^ lt);
@@ -470,7 +490,7 @@ static u32 int_cfc(struct interpreter *inter)
        val = lightrec_mfc(state, op->c);
 
        if (likely(op->r.rt))
-               state->native_reg_cache[op->r.rt] = val;
+               state->regs.gpr[op->r.rt] = val;
 
        return jump_next(inter);
 }
@@ -480,54 +500,35 @@ static u32 int_ctc(struct interpreter *inter)
        struct lightrec_state *state = inter->state;
        const struct opcode *op = inter->op;
 
-       lightrec_mtc(state, op->c, state->native_reg_cache[op->r.rt]);
+       lightrec_mtc(state, op->c, state->regs.gpr[op->r.rt]);
 
        /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause),
         * return early so that the emulator will be able to check software
         * interrupt status. */
-       if (!(inter->op->flags & LIGHTREC_NO_DS) &&
+       if (!op_flag_no_ds(inter->op->flags) &&
            op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
-               return inter->block->pc + (op->offset + 1) * sizeof(u32);
+               return int_get_ds_pc(inter, 1);
        else
                return jump_next(inter);
 }
 
 static u32 int_cp0_RFE(struct interpreter *inter)
 {
-       struct lightrec_state *state = inter->state;
-       u32 status;
-
-       /* Read CP0 Status register (r12) */
-       status = state->ops.cop0_ops.mfc(state, inter->op->c.opcode, 12);
-
-       /* Switch the bits */
-       status = ((status & 0x3c) >> 2) | (status & ~0xf);
-
-       /* Write it back */
-       state->ops.cop0_ops.ctc(state, inter->op->c.opcode, 12, status);
+       lightrec_rfe(inter->state);
 
        return jump_next(inter);
 }
 
 static u32 int_CP(struct interpreter *inter)
 {
-       struct lightrec_state *state = inter->state;
-       const struct lightrec_cop_ops *ops;
-       const struct opcode *op = inter->op;
-
-       if ((op->j.imm >> 25) & 1)
-               ops = &state->ops.cop2_ops;
-       else
-               ops = &state->ops.cop0_ops;
-
-       (*ops->op)(state, (op->j.imm) & ~(1 << 25));
+       lightrec_cp(inter->state, inter->op->c);
 
        return jump_next(inter);
 }
 
 static u32 int_ADDI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -538,7 +539,7 @@ static u32 int_ADDI(struct interpreter *inter)
 
 static u32 int_SLTI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -549,7 +550,7 @@ static u32 int_SLTI(struct interpreter *inter)
 
 static u32 int_SLTIU(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -560,7 +561,7 @@ static u32 int_SLTIU(struct interpreter *inter)
 
 static u32 int_ANDI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -571,7 +572,7 @@ static u32 int_ANDI(struct interpreter *inter)
 
 static u32 int_ORI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -582,7 +583,7 @@ static u32 int_ORI(struct interpreter *inter)
 
 static u32 int_XORI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_i *op = &inter->op->i;
 
        if (likely(op->rt))
@@ -595,7 +596,7 @@ static u32 int_LUI(struct interpreter *inter)
 {
        struct opcode_i *op = &inter->op->i;
 
-       inter->state->native_reg_cache[op->rt] = op->imm << 16;
+       inter->state->regs.gpr[op->rt] = op->imm << 16;
 
        return jump_next(inter);
 }
@@ -603,12 +604,12 @@ static u32 int_LUI(struct interpreter *inter)
 static u32 int_io(struct interpreter *inter, bool is_load)
 {
        struct opcode_i *op = &inter->op->i;
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        u32 val;
 
        val = lightrec_rw(inter->state, inter->op->c,
                          reg_cache[op->rs], reg_cache[op->rt],
-                         &inter->op->flags);
+                         &inter->op->flags, inter->block);
 
        if (is_load && op->rt)
                reg_cache[op->rt] = val;
@@ -625,15 +626,15 @@ static u32 int_store(struct interpreter *inter)
 {
        u32 next_pc;
 
-       if (likely(!(inter->op->flags & LIGHTREC_SMC)))
+       if (likely(!op_flag_smc(inter->op->flags)))
                return int_io(inter, false);
 
        lightrec_rw(inter->state, inter->op->c,
-                   inter->state->native_reg_cache[inter->op->i.rs],
-                   inter->state->native_reg_cache[inter->op->i.rt],
-                   &inter->op->flags);
+                   inter->state->regs.gpr[inter->op->i.rs],
+                   inter->state->regs.gpr[inter->op->i.rt],
+                   &inter->op->flags, inter->block);
 
-       next_pc = inter->block->pc + (inter->op->offset + 1) * 4;
+       next_pc = int_get_ds_pc(inter, 1);
 
        /* Invalidate next PC, to force the rest of the block to be rebuilt */
        lightrec_invalidate(inter->state, next_pc, 4);
@@ -652,8 +653,8 @@ static u32 int_special_SLL(struct interpreter *inter)
        u32 rt;
 
        if (op->opcode) { /* Handle NOPs */
-               rt = inter->state->native_reg_cache[op->r.rt];
-               inter->state->native_reg_cache[op->r.rd] = rt << op->r.imm;
+               rt = inter->state->regs.gpr[op->r.rt];
+               inter->state->regs.gpr[op->r.rd] = rt << op->r.imm;
        }
 
        return jump_next(inter);
@@ -662,9 +663,9 @@ static u32 int_special_SLL(struct interpreter *inter)
 static u32 int_special_SRL(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       u32 rt = inter->state->native_reg_cache[op->r.rt];
+       u32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm;
+       inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm;
 
        return jump_next(inter);
 }
@@ -672,9 +673,9 @@ static u32 int_special_SRL(struct interpreter *inter)
 static u32 int_special_SRA(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       s32 rt = inter->state->native_reg_cache[op->r.rt];
+       s32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm;
+       inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm;
 
        return jump_next(inter);
 }
@@ -682,10 +683,10 @@ static u32 int_special_SRA(struct interpreter *inter)
 static u32 int_special_SLLV(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       u32 rs = inter->state->native_reg_cache[op->r.rs];
-       u32 rt = inter->state->native_reg_cache[op->r.rt];
+       u32 rs = inter->state->regs.gpr[op->r.rs];
+       u32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt << (rs & 0x1f);
+       inter->state->regs.gpr[op->r.rd] = rt << (rs & 0x1f);
 
        return jump_next(inter);
 }
@@ -693,10 +694,10 @@ static u32 int_special_SLLV(struct interpreter *inter)
 static u32 int_special_SRLV(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       u32 rs = inter->state->native_reg_cache[op->r.rs];
-       u32 rt = inter->state->native_reg_cache[op->r.rt];
+       u32 rs = inter->state->regs.gpr[op->r.rs];
+       u32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f);
+       inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f);
 
        return jump_next(inter);
 }
@@ -704,10 +705,10 @@ static u32 int_special_SRLV(struct interpreter *inter)
 static u32 int_special_SRAV(struct interpreter *inter)
 {
        struct opcode *op = inter->op;
-       u32 rs = inter->state->native_reg_cache[op->r.rs];
-       s32 rt = inter->state->native_reg_cache[op->r.rt];
+       u32 rs = inter->state->regs.gpr[op->r.rs];
+       s32 rt = inter->state->regs.gpr[op->r.rt];
 
-       inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f);
+       inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f);
 
        return jump_next(inter);
 }
@@ -720,12 +721,12 @@ static u32 int_syscall_break(struct interpreter *inter)
        else
                inter->state->exit_flags |= LIGHTREC_EXIT_SYSCALL;
 
-       return inter->block->pc + inter->op->offset * sizeof(u32);
+       return int_get_ds_pc(inter, 0);
 }
 
 static u32 int_special_MFHI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
 
        if (likely(op->rd))
@@ -736,7 +737,7 @@ static u32 int_special_MFHI(struct interpreter *inter)
 
 static u32 int_special_MTHI(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
 
        reg_cache[REG_HI] = reg_cache[inter->op->r.rs];
 
@@ -745,7 +746,7 @@ static u32 int_special_MTHI(struct interpreter *inter)
 
 static u32 int_special_MFLO(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
 
        if (likely(op->rd))
@@ -756,7 +757,7 @@ static u32 int_special_MFLO(struct interpreter *inter)
 
 static u32 int_special_MTLO(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
 
        reg_cache[REG_LO] = reg_cache[inter->op->r.rs];
 
@@ -765,37 +766,45 @@ static u32 int_special_MTLO(struct interpreter *inter)
 
 static u32 int_special_MULT(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        s32 rs = reg_cache[inter->op->r.rs];
        s32 rt = reg_cache[inter->op->r.rt];
+       u8 reg_lo = get_mult_div_lo(inter->op->c);
+       u8 reg_hi = get_mult_div_hi(inter->op->c);
        u64 res = (s64)rs * (s64)rt;
 
-       if (!(inter->op->flags & LIGHTREC_MULT32))
-               reg_cache[REG_HI] = res >> 32;
-       reg_cache[REG_LO] = res;
+       if (!op_flag_no_hi(inter->op->flags))
+               reg_cache[reg_hi] = res >> 32;
+       if (!op_flag_no_lo(inter->op->flags))
+               reg_cache[reg_lo] = res;
 
        return jump_next(inter);
 }
 
 static u32 int_special_MULTU(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        u32 rs = reg_cache[inter->op->r.rs];
        u32 rt = reg_cache[inter->op->r.rt];
+       u8 reg_lo = get_mult_div_lo(inter->op->c);
+       u8 reg_hi = get_mult_div_hi(inter->op->c);
        u64 res = (u64)rs * (u64)rt;
 
-       if (!(inter->op->flags & LIGHTREC_MULT32))
-               reg_cache[REG_HI] = res >> 32;
-       reg_cache[REG_LO] = res;
+       if (!op_flag_no_hi(inter->op->flags))
+               reg_cache[reg_hi] = res >> 32;
+       if (!op_flag_no_lo(inter->op->flags))
+               reg_cache[reg_lo] = res;
 
        return jump_next(inter);
 }
 
 static u32 int_special_DIV(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        s32 rs = reg_cache[inter->op->r.rs];
        s32 rt = reg_cache[inter->op->r.rt];
+       u8 reg_lo = get_mult_div_lo(inter->op->c);
+       u8 reg_hi = get_mult_div_hi(inter->op->c);
        u32 lo, hi;
 
        if (rt == 0) {
@@ -806,17 +815,21 @@ static u32 int_special_DIV(struct interpreter *inter)
                hi = rs % rt;
        }
 
-       reg_cache[REG_HI] = hi;
-       reg_cache[REG_LO] = lo;
+       if (!op_flag_no_hi(inter->op->flags))
+               reg_cache[reg_hi] = hi;
+       if (!op_flag_no_lo(inter->op->flags))
+               reg_cache[reg_lo] = lo;
 
        return jump_next(inter);
 }
 
 static u32 int_special_DIVU(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        u32 rs = reg_cache[inter->op->r.rs];
        u32 rt = reg_cache[inter->op->r.rt];
+       u8 reg_lo = get_mult_div_lo(inter->op->c);
+       u8 reg_hi = get_mult_div_hi(inter->op->c);
        u32 lo, hi;
 
        if (rt == 0) {
@@ -827,15 +840,17 @@ static u32 int_special_DIVU(struct interpreter *inter)
                hi = rs % rt;
        }
 
-       reg_cache[REG_HI] = hi;
-       reg_cache[REG_LO] = lo;
+       if (!op_flag_no_hi(inter->op->flags))
+               reg_cache[reg_hi] = hi;
+       if (!op_flag_no_lo(inter->op->flags))
+               reg_cache[reg_lo] = lo;
 
        return jump_next(inter);
 }
 
 static u32 int_special_ADD(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        s32 rs = reg_cache[op->rs];
        s32 rt = reg_cache[op->rt];
@@ -848,7 +863,7 @@ static u32 int_special_ADD(struct interpreter *inter)
 
 static u32 int_special_SUB(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -861,7 +876,7 @@ static u32 int_special_SUB(struct interpreter *inter)
 
 static u32 int_special_AND(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -874,7 +889,7 @@ static u32 int_special_AND(struct interpreter *inter)
 
 static u32 int_special_OR(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -887,7 +902,7 @@ static u32 int_special_OR(struct interpreter *inter)
 
 static u32 int_special_XOR(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -900,7 +915,7 @@ static u32 int_special_XOR(struct interpreter *inter)
 
 static u32 int_special_NOR(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -913,7 +928,7 @@ static u32 int_special_NOR(struct interpreter *inter)
 
 static u32 int_special_SLT(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        s32 rs = reg_cache[op->rs];
        s32 rt = reg_cache[op->rt];
@@ -926,7 +941,7 @@ static u32 int_special_SLT(struct interpreter *inter)
 
 static u32 int_special_SLTU(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
        u32 rs = reg_cache[op->rs];
        u32 rt = reg_cache[op->rt];
@@ -937,14 +952,9 @@ static u32 int_special_SLTU(struct interpreter *inter)
        return jump_next(inter);
 }
 
-static u32 int_META_SKIP(struct interpreter *inter)
-{
-       return jump_skip(inter);
-}
-
 static u32 int_META_MOV(struct interpreter *inter)
 {
-       u32 *reg_cache = inter->state->native_reg_cache;
+       u32 *reg_cache = inter->state->regs.gpr;
        struct opcode_r *op = &inter->op->r;
 
        if (likely(op->rd))
@@ -953,15 +963,57 @@ static u32 int_META_MOV(struct interpreter *inter)
        return jump_next(inter);
 }
 
-static u32 int_META_SYNC(struct interpreter *inter)
+static u32 int_META_EXTC(struct interpreter *inter)
 {
-       inter->state->current_cycle += inter->cycles;
-       inter->cycles = 0;
+       u32 *reg_cache = inter->state->regs.gpr;
+       struct opcode_i *op = &inter->op->i;
 
-       return jump_skip(inter);
+       if (likely(op->rt))
+               reg_cache[op->rt] = (u32)(s32)(s8)reg_cache[op->rs];
+
+       return jump_next(inter);
+}
+
+static u32 int_META_EXTS(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->regs.gpr;
+       struct opcode_i *op = &inter->op->i;
+
+       if (likely(op->rt))
+               reg_cache[op->rt] = (u32)(s32)(s16)reg_cache[op->rs];
+
+       return jump_next(inter);
+}
+
+static u32 int_META_MULT2(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->regs.gpr;
+       union code c = inter->op->c;
+       u32 rs = reg_cache[c.r.rs];
+       u8 reg_lo = get_mult_div_lo(c);
+       u8 reg_hi = get_mult_div_hi(c);
+
+       if (!op_flag_no_lo(inter->op->flags)) {
+               if (c.r.op < 32)
+                       reg_cache[reg_lo] = rs << c.r.op;
+               else
+                       reg_cache[reg_lo] = 0;
+       }
+
+       if (!op_flag_no_hi(inter->op->flags)) {
+               if (c.r.op >= 32)
+                       reg_cache[reg_hi] = rs << (c.r.op - 32);
+               else if (c.i.op == OP_META_MULT2)
+                       reg_cache[reg_hi] = (s32) rs >> (32 - c.r.op);
+               else
+                       reg_cache[reg_hi] = rs >> (32 - c.r.op);
+       }
+
+       return jump_next(inter);
 }
 
 static const lightrec_int_func_t int_standard[64] = {
+       SET_DEFAULT_ELM(int_standard, int_unimplemented),
        [OP_SPECIAL]            = int_SPECIAL,
        [OP_REGIMM]             = int_REGIMM,
        [OP_J]                  = int_J,
@@ -995,14 +1047,15 @@ static const lightrec_int_func_t int_standard[64] = {
        [OP_LWC2]               = int_LWC2,
        [OP_SWC2]               = int_store,
 
-       [OP_META_REG_UNLOAD]    = int_META_SKIP,
-       [OP_META_BEQZ]          = int_BEQ,
-       [OP_META_BNEZ]          = int_BNE,
        [OP_META_MOV]           = int_META_MOV,
-       [OP_META_SYNC]          = int_META_SYNC,
+       [OP_META_EXTC]          = int_META_EXTC,
+       [OP_META_EXTS]          = int_META_EXTS,
+       [OP_META_MULT2]         = int_META_MULT2,
+       [OP_META_MULTU2]        = int_META_MULT2,
 };
 
 static const lightrec_int_func_t int_special[64] = {
+       SET_DEFAULT_ELM(int_special, int_unimplemented),
        [OP_SPECIAL_SLL]        = int_special_SLL,
        [OP_SPECIAL_SRL]        = int_special_SRL,
        [OP_SPECIAL_SRA]        = int_special_SRA,
@@ -1034,6 +1087,7 @@ static const lightrec_int_func_t int_special[64] = {
 };
 
 static const lightrec_int_func_t int_regimm[64] = {
+       SET_DEFAULT_ELM(int_regimm, int_unimplemented),
        [OP_REGIMM_BLTZ]        = int_regimm_BLTZ,
        [OP_REGIMM_BGEZ]        = int_regimm_BGEZ,
        [OP_REGIMM_BLTZAL]      = int_regimm_BLTZAL,
@@ -1041,6 +1095,7 @@ static const lightrec_int_func_t int_regimm[64] = {
 };
 
 static const lightrec_int_func_t int_cp0[64] = {
+       SET_DEFAULT_ELM(int_cp0, int_CP),
        [OP_CP0_MFC0]           = int_cfc,
        [OP_CP0_CFC0]           = int_cfc,
        [OP_CP0_MTC0]           = int_ctc,
@@ -1049,6 +1104,7 @@ static const lightrec_int_func_t int_cp0[64] = {
 };
 
 static const lightrec_int_func_t int_cp2_basic[64] = {
+       SET_DEFAULT_ELM(int_cp2_basic, int_CP),
        [OP_CP2_BASIC_MFC2]     = int_cfc,
        [OP_CP2_BASIC_CFC2]     = int_cfc,
        [OP_CP2_BASIC_MTC2]     = int_ctc,
@@ -1058,54 +1114,54 @@ static const lightrec_int_func_t int_cp2_basic[64] = {
 static u32 int_SPECIAL(struct interpreter *inter)
 {
        lightrec_int_func_t f = int_special[inter->op->r.op];
-       if (likely(f))
-               return execute(f, inter);
-       else
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
                return int_unimplemented(inter);
+
+       return execute(f, inter);
 }
 
 static u32 int_REGIMM(struct interpreter *inter)
 {
        lightrec_int_func_t f = int_regimm[inter->op->r.rt];
-       if (likely(f))
-               return execute(f, inter);
-       else
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
                return int_unimplemented(inter);
+
+       return execute(f, inter);
 }
 
 static u32 int_CP0(struct interpreter *inter)
 {
        lightrec_int_func_t f = int_cp0[inter->op->r.rs];
-       if (likely(f))
-               return execute(f, inter);
-       else
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
                return int_CP(inter);
+
+       return execute(f, inter);
 }
 
 static u32 int_CP2(struct interpreter *inter)
 {
        if (inter->op->r.op == OP_CP2_BASIC) {
                lightrec_int_func_t f = int_cp2_basic[inter->op->r.rs];
-               if (likely(f))
+               if (HAS_DEFAULT_ELM || likely(f))
                        return execute(f, inter);
        }
 
        return int_CP(inter);
 }
 
-static u32 lightrec_int_op(struct interpreter *inter)
-{
-       return execute(int_standard[inter->op->i.op], inter);
-}
-
-static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op)
+static u32 lightrec_emulate_block_list(struct lightrec_state *state,
+                                      struct block *block, u32 offset)
 {
        struct interpreter inter;
        u32 pc;
 
        inter.block = block;
-       inter.state = block->state;
-       inter.op = op;
+       inter.state = state;
+       inter.offset = offset;
+       inter.op = &block->opcode_list[offset];
        inter.cycles = 0;
        inter.delay_slot = false;
 
@@ -1114,22 +1170,21 @@ static u32 lightrec_emulate_block_list(struct block *block, struct opcode *op)
        /* Add the cycles of the last branch */
        inter.cycles += lightrec_cycles_of_opcode(inter.op->c);
 
-       block->state->current_cycle += inter.cycles;
+       state->current_cycle += inter.cycles;
 
        return pc;
 }
 
-u32 lightrec_emulate_block(struct block *block, u32 pc)
+u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc)
 {
        u32 offset = (kunseg(pc) - kunseg(block->pc)) >> 2;
-       struct opcode *op;
 
-       for (op = block->opcode_list;
-            op && (op->offset < offset); op = op->next);
-       if (op)
-               return lightrec_emulate_block_list(block, op);
+       if (offset < block->nb_ops)
+               return lightrec_emulate_block_list(state, block, offset);
 
        pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);
 
+       lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
+
        return 0;
 }