git subrepo pull --force deps/lightrec
[pcsx_rearmed.git] / deps / lightrec / interpreter.c
index 922f081..4267bec 100644 (file)
@@ -16,6 +16,7 @@ struct interpreter;
 static u32 int_CP0(struct interpreter *inter);
 static u32 int_CP2(struct interpreter *inter);
 static u32 int_SPECIAL(struct interpreter *inter);
+static u32 int_META(struct interpreter *inter);
 static u32 int_REGIMM(struct interpreter *inter);
 static u32 int_branch(struct interpreter *inter, u32 pc,
                      union code code, bool branch);
@@ -30,6 +31,7 @@ struct interpreter {
        struct opcode *op;
        u32 cycles;
        bool delay_slot;
+       bool load_delay;
        u16 offset;
 };
 
@@ -45,7 +47,7 @@ static inline u32 int_get_ds_pc(const struct interpreter *inter, s16 imm)
 
 static inline struct opcode *next_op(const struct interpreter *inter)
 {
-       return &inter->block->opcode_list[inter->offset + 1];
+       return &inter->op[1];
 }
 
 static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter)
@@ -63,7 +65,7 @@ static inline u32 jump_skip(struct interpreter *inter)
        inter->op = next_op(inter);
        inter->offset++;
 
-       if (inter->op->flags & LIGHTREC_SYNC) {
+       if (op_flag_sync(inter->op->flags)) {
                inter->state->current_cycle += inter->cycles;
                inter->cycles = 0;
        }
@@ -73,7 +75,7 @@ static inline u32 jump_skip(struct interpreter *inter)
 
 static inline u32 jump_next(struct interpreter *inter)
 {
-       inter->cycles += lightrec_cycles_of_opcode(inter->op->c);
+       inter->cycles += lightrec_cycles_of_opcode(inter->state, inter->op->c);
 
        if (unlikely(inter->delay_slot))
                return 0;
@@ -83,7 +85,7 @@ static inline u32 jump_next(struct interpreter *inter)
 
 static inline u32 jump_after_branch(struct interpreter *inter)
 {
-       inter->cycles += lightrec_cycles_of_opcode(inter->op->c);
+       inter->cycles += lightrec_cycles_of_opcode(inter->state, inter->op->c);
 
        if (unlikely(inter->delay_slot))
                return 0;
@@ -99,11 +101,11 @@ static void update_cycles_before_branch(struct interpreter *inter)
        u32 cycles;
 
        if (!inter->delay_slot) {
-               cycles = lightrec_cycles_of_opcode(inter->op->c);
+               cycles = lightrec_cycles_of_opcode(inter->state, inter->op->c);
 
-               if (has_delay_slot(inter->op->c) &&
-                   !(inter->op->flags & LIGHTREC_NO_DS))
-                       cycles += lightrec_cycles_of_opcode(next_op(inter)->c);
+               if (!op_flag_no_ds(inter->op->flags) &&
+                   has_delay_slot(inter->op->c))
+                       cycles += lightrec_cycles_of_opcode(inter->state, next_op(inter)->c);
 
                inter->cycles += cycles;
                inter->state->current_cycle += inter->cycles;
@@ -149,14 +151,13 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                .state = state,
                .cycles = inter->cycles,
                .delay_slot = true,
-               .block = NULL,
+               .load_delay = true,
        };
        bool run_first_op = false, dummy_ld = false, save_rs = false,
             load_in_ds, branch_in_ds = false, branch_at_addr = false,
             branch_taken;
-       u32 old_rs, new_rs, new_rt;
-       u32 next_pc, ds_next_pc;
-       u32 cause, epc;
+       u32 new_rt, old_rs = 0, new_rs = 0;
+       u32 next_pc, ds_next_pc, epc;
 
        if (op->i.op == OP_CP0 && op->r.rs == OP_CP0_RFE) {
                /* When an IRQ happens, the PSX exception handlers (when done)
@@ -167,11 +168,13 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                 * but on branch boundaries, we need to adjust the return
                 * address so that the GTE opcode is effectively executed.
                 */
-               cause = state->regs.cp0[13];
                epc = state->regs.cp0[14];
 
-               if (!(cause & 0x7c) && epc == pc - 4)
-                       pc -= 4;
+               if (epc == pc - 4) {
+                       op_next = lightrec_read_opcode(state, epc);
+                       if (op_next.i.op == OP_CP2)
+                               pc -= 4;
+               }
        }
 
        if (inter->delay_slot) {
@@ -186,7 +189,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
         * interpreter in that case.
         * Same goes for when we have a branch in a delay slot of another
         * branch. */
-       load_in_ds = load_in_delay_slot(op->c);
+       load_in_ds = opcode_has_load_delay(op->c);
        branch_in_ds = has_delay_slot(op->c);
 
        if (branch) {
@@ -235,12 +238,13 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                        branch_taken = is_branch_taken(reg_cache, op_next);
                        pr_debug("Target of impossible branch is a branch, "
                                 "%staken.\n", branch_taken ? "" : "not ");
-                       inter->cycles += lightrec_cycles_of_opcode(op_next);
+                       inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next);
                        old_rs = reg_cache[op_next.r.rs];
                } else {
                        new_op.c = op_next;
                        new_op.flags = 0;
                        inter2.op = &new_op;
+                       inter2.offset = 0;
 
                        /* Execute the first opcode of the next block */
                        lightrec_int_op(&inter2);
@@ -250,7 +254,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                                reg_cache[op->r.rs] = old_rs;
                        }
 
-                       inter->cycles += lightrec_cycles_of_opcode(op_next);
+                       inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next);
                }
        } else {
                next_pc = int_get_ds_pc(inter, 2);
@@ -259,6 +263,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
        inter2.block = inter->block;
        inter2.op = op;
        inter2.cycles = inter->cycles;
+       inter2.offset = inter->offset + 1;
 
        if (dummy_ld)
                new_rt = reg_cache[op->r.rt];
@@ -290,7 +295,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
        if (dummy_ld)
                reg_cache[op->r.rt] = new_rt;
 
-       inter->cycles += lightrec_cycles_of_opcode(op->c);
+       inter->cycles += lightrec_cycles_of_opcode(inter->state, op->c);
 
        if (branch_at_addr && branch_taken) {
                /* If the branch at the target of the branch opcode is taken,
@@ -303,7 +308,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
                inter2.op = &new_op;
                inter2.block = NULL;
 
-               inter->cycles += lightrec_cycles_of_opcode(op_next);
+               inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next);
 
                pr_debug("Running delay slot of branch at target of impossible "
                         "branch\n");
@@ -315,9 +320,9 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch)
 
 static u32 int_unimplemented(struct interpreter *inter)
 {
-       pr_warn("Unimplemented opcode 0x%08x\n", inter->op->opcode);
+       lightrec_set_exit_flags(inter->state, LIGHTREC_EXIT_UNKNOWN_OP);
 
-       return jump_next(inter);
+       return inter->block->pc + (inter->offset << 2);
 }
 
 static u32 int_jump(struct interpreter *inter, bool link)
@@ -329,7 +334,7 @@ static u32 int_jump(struct interpreter *inter, bool link)
        if (link)
                state->regs.gpr[31] = old_pc + 8;
 
-       if (inter->op->flags & LIGHTREC_NO_DS)
+       if (op_flag_no_ds(inter->op->flags))
                return pc;
 
        return int_delay_slot(inter, pc, true);
@@ -348,14 +353,13 @@ static u32 int_JAL(struct interpreter *inter)
 static u32 int_jumpr(struct interpreter *inter, u8 link_reg)
 {
        struct lightrec_state *state = inter->state;
-       u32 old_pc, next_pc = state->regs.gpr[inter->op->r.rs];
+       u32 old_pc = int_get_branch_pc(inter);
+       u32 next_pc = state->regs.gpr[inter->op->r.rs];
 
-       if (link_reg) {
-               old_pc = int_get_branch_pc(inter);
+       if (link_reg)
                state->regs.gpr[link_reg] = old_pc + 8;
-       }
 
-       if (inter->op->flags & LIGHTREC_NO_DS)
+       if (op_flag_no_ds(inter->op->flags))
                return next_pc;
 
        return int_delay_slot(inter, next_pc, true);
@@ -373,8 +377,7 @@ static u32 int_special_JALR(struct interpreter *inter)
 
 static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc)
 {
-       if (!inter->delay_slot &&
-           (inter->op->flags & LIGHTREC_LOCAL_BRANCH) &&
+       if (!inter->delay_slot && op_flag_local_branch(inter->op->flags) &&
            (s16)inter->op->c.i.imm >= 0) {
                next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2);
                next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc);
@@ -390,7 +393,7 @@ static u32 int_branch(struct interpreter *inter, u32 pc,
 
        update_cycles_before_branch(inter);
 
-       if (inter->op->flags & LIGHTREC_NO_DS) {
+       if (op_flag_no_ds(inter->op->flags)) {
                if (branch)
                        return int_do_branch(inter, pc, next_pc);
                else
@@ -403,7 +406,7 @@ static u32 int_branch(struct interpreter *inter, u32 pc,
        if (branch)
                return int_do_branch(inter, pc, next_pc);
 
-       if (inter->op->flags & LIGHTREC_EMULATE_BRANCH)
+       if (op_flag_emulate_branch(inter->op->flags))
                return pc + 8;
        else
                return jump_after_branch(inter);
@@ -492,12 +495,12 @@ static u32 int_ctc(struct interpreter *inter)
        struct lightrec_state *state = inter->state;
        const struct opcode *op = inter->op;
 
-       lightrec_mtc(state, op->c, state->regs.gpr[op->r.rt]);
+       lightrec_mtc(state, op->c, op->r.rd, state->regs.gpr[op->r.rt]);
 
        /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause),
         * return early so that the emulator will be able to check software
         * interrupt status. */
-       if (!(inter->op->flags & LIGHTREC_NO_DS) &&
+       if (!op_flag_no_ds(inter->op->flags) &&
            op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13))
                return int_get_ds_pc(inter, 1);
        else
@@ -597,11 +600,14 @@ static u32 int_io(struct interpreter *inter, bool is_load)
 {
        struct opcode_i *op = &inter->op->i;
        u32 *reg_cache = inter->state->regs.gpr;
-       u32 val;
+       u32 val, *flags = NULL;
+
+       if (!inter->load_delay && inter->block)
+               flags = &inter->op->flags;
 
        val = lightrec_rw(inter->state, inter->op->c,
                          reg_cache[op->rs], reg_cache[op->rt],
-                         &inter->op->flags, inter->block);
+                         flags, inter->block, inter->offset);
 
        if (is_load && op->rt)
                reg_cache[op->rt] = val;
@@ -618,13 +624,13 @@ static u32 int_store(struct interpreter *inter)
 {
        u32 next_pc;
 
-       if (likely(!(inter->op->flags & LIGHTREC_SMC)))
+       if (likely(!op_flag_smc(inter->op->flags)))
                return int_io(inter, false);
 
        lightrec_rw(inter->state, inter->op->c,
                    inter->state->regs.gpr[inter->op->i.rs],
                    inter->state->regs.gpr[inter->op->i.rt],
-                   &inter->op->flags, inter->block);
+                   &inter->op->flags, inter->block, inter->offset);
 
        next_pc = int_get_ds_pc(inter, 1);
 
@@ -709,9 +715,9 @@ static u32 int_syscall_break(struct interpreter *inter)
 {
 
        if (inter->op->r.op == OP_SPECIAL_BREAK)
-               inter->state->exit_flags |= LIGHTREC_EXIT_BREAK;
+               lightrec_set_exit_flags(inter->state, LIGHTREC_EXIT_BREAK);
        else
-               inter->state->exit_flags |= LIGHTREC_EXIT_SYSCALL;
+               lightrec_set_exit_flags(inter->state, LIGHTREC_EXIT_SYSCALL);
 
        return int_get_ds_pc(inter, 0);
 }
@@ -765,9 +771,9 @@ static u32 int_special_MULT(struct interpreter *inter)
        u8 reg_hi = get_mult_div_hi(inter->op->c);
        u64 res = (s64)rs * (s64)rt;
 
-       if (!(inter->op->flags & LIGHTREC_NO_HI))
+       if (!op_flag_no_hi(inter->op->flags))
                reg_cache[reg_hi] = res >> 32;
-       if (!(inter->op->flags & LIGHTREC_NO_LO))
+       if (!op_flag_no_lo(inter->op->flags))
                reg_cache[reg_lo] = res;
 
        return jump_next(inter);
@@ -782,9 +788,9 @@ static u32 int_special_MULTU(struct interpreter *inter)
        u8 reg_hi = get_mult_div_hi(inter->op->c);
        u64 res = (u64)rs * (u64)rt;
 
-       if (!(inter->op->flags & LIGHTREC_NO_HI))
+       if (!op_flag_no_hi(inter->op->flags))
                reg_cache[reg_hi] = res >> 32;
-       if (!(inter->op->flags & LIGHTREC_NO_LO))
+       if (!op_flag_no_lo(inter->op->flags))
                reg_cache[reg_lo] = res;
 
        return jump_next(inter);
@@ -807,9 +813,9 @@ static u32 int_special_DIV(struct interpreter *inter)
                hi = rs % rt;
        }
 
-       if (!(inter->op->flags & LIGHTREC_NO_HI))
+       if (!op_flag_no_hi(inter->op->flags))
                reg_cache[reg_hi] = hi;
-       if (!(inter->op->flags & LIGHTREC_NO_LO))
+       if (!op_flag_no_lo(inter->op->flags))
                reg_cache[reg_lo] = lo;
 
        return jump_next(inter);
@@ -832,9 +838,9 @@ static u32 int_special_DIVU(struct interpreter *inter)
                hi = rs % rt;
        }
 
-       if (!(inter->op->flags & LIGHTREC_NO_HI))
+       if (!op_flag_no_hi(inter->op->flags))
                reg_cache[reg_hi] = hi;
-       if (!(inter->op->flags & LIGHTREC_NO_LO))
+       if (!op_flag_no_lo(inter->op->flags))
                reg_cache[reg_lo] = lo;
 
        return jump_next(inter);
@@ -947,7 +953,7 @@ static u32 int_special_SLTU(struct interpreter *inter)
 static u32 int_META_MOV(struct interpreter *inter)
 {
        u32 *reg_cache = inter->state->regs.gpr;
-       struct opcode_r *op = &inter->op->r;
+       struct opcode_m *op = &inter->op->m;
 
        if (likely(op->rd))
                reg_cache[op->rd] = reg_cache[op->rs];
@@ -958,10 +964,10 @@ static u32 int_META_MOV(struct interpreter *inter)
 static u32 int_META_EXTC(struct interpreter *inter)
 {
        u32 *reg_cache = inter->state->regs.gpr;
-       struct opcode_i *op = &inter->op->i;
+       struct opcode_m *op = &inter->op->m;
 
-       if (likely(op->rt))
-               reg_cache[op->rt] = (u32)(s32)(s8)reg_cache[op->rs];
+       if (likely(op->rd))
+               reg_cache[op->rd] = (u32)(s32)(s8)reg_cache[op->rs];
 
        return jump_next(inter);
 }
@@ -969,10 +975,56 @@ static u32 int_META_EXTC(struct interpreter *inter)
 static u32 int_META_EXTS(struct interpreter *inter)
 {
        u32 *reg_cache = inter->state->regs.gpr;
-       struct opcode_i *op = &inter->op->i;
+       struct opcode_m *op = &inter->op->m;
 
-       if (likely(op->rt))
-               reg_cache[op->rt] = (u32)(s32)(s16)reg_cache[op->rs];
+       if (likely(op->rd))
+               reg_cache[op->rd] = (u32)(s32)(s16)reg_cache[op->rs];
+
+       return jump_next(inter);
+}
+
+static u32 int_META_MULT2(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->regs.gpr;
+       union code c = inter->op->c;
+       u32 rs = reg_cache[c.r.rs];
+       u8 reg_lo = get_mult_div_lo(c);
+       u8 reg_hi = get_mult_div_hi(c);
+
+       if (!op_flag_no_lo(inter->op->flags)) {
+               if (c.r.op < 32)
+                       reg_cache[reg_lo] = rs << c.r.op;
+               else
+                       reg_cache[reg_lo] = 0;
+       }
+
+       if (!op_flag_no_hi(inter->op->flags)) {
+               if (c.r.op >= 32) {
+                       reg_cache[reg_hi] = rs << (c.r.op - 32);
+               }
+               else if (c.i.op == OP_META_MULT2) {
+                       if (c.r.op)
+                               reg_cache[reg_hi] = (s32) rs >> (32 - c.r.op);
+                       else
+                               reg_cache[reg_hi] = (s32) rs >> 31;
+               } else {
+                       if (c.r.op)
+                               reg_cache[reg_hi] = rs >> (32 - c.r.op);
+                       else
+                               reg_cache[reg_hi] = 0;
+               }
+       }
+
+       return jump_next(inter);
+}
+
+static u32 int_META_COM(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->regs.gpr;
+       union code c = inter->op->c;
+
+       if (likely(c.m.rd))
+               reg_cache[c.m.rd] = ~reg_cache[c.m.rs];
 
        return jump_next(inter);
 }
@@ -1012,9 +1064,11 @@ static const lightrec_int_func_t int_standard[64] = {
        [OP_LWC2]               = int_LWC2,
        [OP_SWC2]               = int_store,
 
-       [OP_META_MOV]           = int_META_MOV,
-       [OP_META_EXTC]          = int_META_EXTC,
-       [OP_META_EXTS]          = int_META_EXTS,
+       [OP_META]               = int_META,
+       [OP_META_MULT2]         = int_META_MULT2,
+       [OP_META_MULTU2]        = int_META_MULT2,
+       [OP_META_LWU]           = int_load,
+       [OP_META_SWU]           = int_store,
 };
 
 static const lightrec_int_func_t int_special[64] = {
@@ -1074,6 +1128,14 @@ static const lightrec_int_func_t int_cp2_basic[64] = {
        [OP_CP2_BASIC_CTC2]     = int_ctc,
 };
 
+static const lightrec_int_func_t int_meta[64] = {
+       SET_DEFAULT_ELM(int_meta, int_unimplemented),
+       [OP_META_MOV]           = int_META_MOV,
+       [OP_META_EXTC]          = int_META_EXTC,
+       [OP_META_EXTS]          = int_META_EXTS,
+       [OP_META_COM]           = int_META_COM,
+};
+
 static u32 int_SPECIAL(struct interpreter *inter)
 {
        lightrec_int_func_t f = int_special[inter->op->r.op];
@@ -1115,23 +1177,31 @@ static u32 int_CP2(struct interpreter *inter)
        return int_CP(inter);
 }
 
+static u32 int_META(struct interpreter *inter)
+{
+       lightrec_int_func_t f = int_meta[inter->op->m.op];
+
+       if (!HAS_DEFAULT_ELM && unlikely(!f))
+               return int_unimplemented(inter);
+
+       return execute(f, inter);
+}
+
 static u32 lightrec_emulate_block_list(struct lightrec_state *state,
                                       struct block *block, u32 offset)
 {
-       struct interpreter inter;
+       struct interpreter inter = {
+               .block = block,
+               .state = state,
+               .offset = offset,
+               .op = &block->opcode_list[offset],
+       };
        u32 pc;
 
-       inter.block = block;
-       inter.state = state;
-       inter.offset = offset;
-       inter.op = &block->opcode_list[offset];
-       inter.cycles = 0;
-       inter.delay_slot = false;
-
        pc = lightrec_int_op(&inter);
 
        /* Add the cycles of the last branch */
-       inter.cycles += lightrec_cycles_of_opcode(inter.op->c);
+       inter.cycles += lightrec_cycles_of_opcode(inter.state, inter.op->c);
 
        state->current_cycle += inter.cycles;
 
@@ -1145,7 +1215,80 @@ u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u3
        if (offset < block->nb_ops)
                return lightrec_emulate_block_list(state, block, offset);
 
-       pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);
+       pr_err(PC_FMT" is outside block at "PC_FMT"\n", pc, block->pc);
+
+       lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
 
        return 0;
 }
+
+static u32 branch_get_next_pc(struct lightrec_state *state, union code c, u32 pc)
+{
+       switch (c.i.op) {
+       case OP_SPECIAL:
+               /* JR / JALR */
+               return state->regs.gpr[c.r.rs];
+       case OP_J:
+       case OP_JAL:
+               return (pc & 0xf0000000) | (c.j.imm << 2);
+       default:
+               /* Branch opcodes */
+               return pc + 4 + ((s16)c.i.imm << 2);
+       }
+}
+
+u32 lightrec_handle_load_delay(struct lightrec_state *state,
+                              struct block *block, u32 pc, u32 reg)
+{
+       union code c = lightrec_read_opcode(state, pc);
+       struct opcode op[2] = {
+               {
+                       .c = c,
+                       .flags = 0,
+               },
+               {
+                       .flags = 0,
+               },
+       };
+       struct interpreter inter = {
+               .block = block,
+               .state = state,
+               .op = op,
+               .load_delay = true,
+       };
+       bool branch_taken;
+       u32 reg_mask, next_pc;
+
+       if (has_delay_slot(c)) {
+               op[1].c = lightrec_read_opcode(state, pc + 4);
+
+               branch_taken = is_branch_taken(state->regs.gpr, c);
+               next_pc = branch_get_next_pc(state, c, pc);
+
+               /* Branch was evaluated, we can write the load opcode's target
+                * register now. */
+               state->regs.gpr[reg] = state->temp_reg;
+
+               /* Handle JALR / regimm opcodes setting $ra (or any other
+                * register in the case of JALR) */
+               reg_mask = (u32)opcode_write_mask(c);
+               if (reg_mask)
+                       state->regs.gpr[ctz32(reg_mask)] = pc + 8;
+
+               /* Handle delay slot of the branch opcode */
+               pc = int_delay_slot(&inter, next_pc, branch_taken);
+       } else {
+               /* Make sure we only run one instruction */
+               inter.delay_slot = true;
+
+               lightrec_int_op(&inter);
+               pc += 4;
+
+               if (!opcode_writes_register(c, reg))
+                       state->regs.gpr[reg] = state->temp_reg;
+       }
+
+       state->current_cycle += inter.cycles;
+
+       return pc;
+}