From: Paul Cercueil Date: Sat, 16 Jul 2022 20:05:41 +0000 (+0200) Subject: Update lightrec 20220716 (#672) X-Git-Tag: r24l~445 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=03535202b4b624c534340322646fb7f4062e3f53;p=pcsx_rearmed.git Update lightrec 20220716 (#672) * lightrec: Simply invalidate code and reset registers for plugin reset Instead of destroying the Lightrec instance and creating a new one, simply invalidate the code and reset all registers to their default values. Signed-off-by: Paul Cercueil * git subrepo pull --force deps/lightrec subrepo: subdir: "deps/lightrec" merged: "7545b5a7" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "7545b5a7" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" --- diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 125d138a..9e55aa9f 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = 30bad28d7a2b2903cd7f3d8024ae7a34a0c8b482 - parent = 0141267e1c5e17c27548f6ad57c7acc22e589990 + commit = 7545b5a7995be9e7b70e786a6b534004ea26c999 + parent = 2fba93f2853c57240f031adb4712acbd2a066d34 method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index 9b2dbd53..5c94324f 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -97,9 +97,6 @@ static const char *cp2_opcodes[] = { static const char *opcode_flags[] = { "switched branch/DS", - "unload Rs", - "unload Rt", - "unload Rd", "sync point", }; @@ -128,7 +125,26 @@ static const char *opcode_multdiv_flags[] = { "No div check", }; -static int print_flags(char *buf, size_t len, u16 flags, +static size_t do_snprintf(char *buf, size_t len, bool *first, + const char *arg1, const char *arg2) +{ + size_t bytes; + + if (*first) + bytes = snprintf(buf, len, "(%s%s", arg1, arg2); + else + bytes = snprintf(buf, len, ", %s%s", arg1, arg2); + + *first = false; + + return bytes; +} + +static const char * const reg_op_token[3] = { + "-", "*", "~", +}; + +static int print_flags(char *buf, size_t len, const struct opcode *op, const char **array, size_t array_size, bool is_io) { @@ -136,6 +152,8 @@ static int print_flags(char *buf, size_t len, u16 flags, unsigned int i, io_mode; size_t count = 0, bytes; bool first = true; + u32 flags = op->flags; + unsigned int reg_op; for (i = 0; i < array_size + ARRAY_SIZE(opcode_flags); i++) { if (!(flags & BIT(i))) @@ -146,12 +164,7 @@ static int print_flags(char *buf, size_t len, u16 flags, else flag_name = array[i - ARRAY_SIZE(opcode_flags)]; - if (first) - bytes = snprintf(buf, len, "(%s", flag_name); - else - bytes = snprintf(buf, len, ", %s", flag_name); - - first = false; + bytes = do_snprintf(buf, len, &first, "", flag_name); buf += bytes; len -= bytes; count += bytes; @@ -162,12 +175,39 @@ static int print_flags(char *buf, size_t len, u16 flags, if (io_mode > 0) { io_mode_name = opcode_io_modes[io_mode - 1]; - if (first) - bytes = snprintf(buf, len, "(%s", io_mode_name); - else - bytes = snprintf(buf, len, ", %s", io_mode_name); + bytes = do_snprintf(buf, len, &first, "", io_mode_name); + buf += bytes; + len -= bytes; + count += bytes; + } + } + + if (OPT_EARLY_UNLOAD) { + reg_op = LIGHTREC_FLAGS_GET_RS(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->i.rs)); + buf += bytes; + len -= bytes; + count += bytes; + } + + reg_op = LIGHTREC_FLAGS_GET_RT(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->i.rt)); + buf += bytes; + len -= bytes; + count += bytes; + } - first = false; + reg_op = LIGHTREC_FLAGS_GET_RD(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->r.rd)); buf += bytes; len -= bytes; count += bytes; @@ -309,6 +349,13 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, std_opcodes[c.i.op], (pc & 0xf0000000) | (c.j.imm << 2)); case OP_BEQ: + if (c.i.rs == c.i.rt) { + *flags_ptr = opcode_branch_flags; + *nb_flags = ARRAY_SIZE(opcode_branch_flags); + return snprintf(buf, len, "b 0x%x", + pc + 4 + ((s16)c.i.imm << 2)); + } + fallthrough; case OP_BNE: case OP_BLEZ: case OP_BGTZ: @@ -417,8 +464,7 @@ void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr) count2 = 0; } - print_flags(buf3, sizeof(buf3), op->flags, flags_ptr, nb_flags, - is_io); + print_flags(buf3, sizeof(buf3), op, flags_ptr, nb_flags, is_io); printf("0x%08x (0x%x)\t%s%*c%s%*c%s\n", pc, i << 2, buf, 30 - (int)count, ' ', buf2, 30 - (int)count2, ' ', buf3); diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h index 9abc28ef..a4fc9f50 100644 --- a/deps/lightrec/disassembler.h +++ b/deps/lightrec/disassembler.h @@ -8,6 +8,7 @@ #include "debug.h" #include "lightrec.h" +#include "lightrec-config.h" #ifndef __packed #define __packed __attribute__((packed)) @@ -17,18 +18,15 @@ /* Flags for all opcodes */ #define LIGHTREC_NO_DS BIT(0) -#define LIGHTREC_UNLOAD_RS BIT(1) -#define LIGHTREC_UNLOAD_RT BIT(2) -#define LIGHTREC_UNLOAD_RD BIT(3) -#define LIGHTREC_SYNC BIT(4) +#define LIGHTREC_SYNC BIT(1) /* Flags for load/store opcodes */ -#define LIGHTREC_SMC BIT(5) -#define LIGHTREC_NO_INVALIDATE BIT(6) -#define LIGHTREC_NO_MASK BIT(7) +#define LIGHTREC_SMC BIT(2) +#define LIGHTREC_NO_INVALIDATE BIT(3) +#define LIGHTREC_NO_MASK BIT(4) /* I/O mode for load/store opcodes */ -#define LIGHTREC_IO_MODE_LSB 8 +#define LIGHTREC_IO_MODE_LSB 5 #define LIGHTREC_IO_MODE(x) ((x) << LIGHTREC_IO_MODE_LSB) #define LIGHTREC_IO_UNKNOWN 0x0 #define LIGHTREC_IO_DIRECT 0x1 @@ -41,13 +39,36 @@ (((x) & LIGHTREC_IO_MASK) >> LIGHTREC_IO_MODE_LSB) /* Flags for branches */ -#define LIGHTREC_EMULATE_BRANCH BIT(5) -#define LIGHTREC_LOCAL_BRANCH BIT(6) +#define LIGHTREC_EMULATE_BRANCH BIT(2) +#define LIGHTREC_LOCAL_BRANCH BIT(3) /* Flags for div/mult opcodes */ -#define LIGHTREC_NO_LO BIT(5) -#define LIGHTREC_NO_HI BIT(6) -#define LIGHTREC_NO_DIV_CHECK BIT(7) +#define LIGHTREC_NO_LO BIT(2) +#define LIGHTREC_NO_HI BIT(3) +#define LIGHTREC_NO_DIV_CHECK BIT(4) + +#define LIGHTREC_REG_RS_LSB 26 +#define LIGHTREC_REG_RS(x) ((x) << LIGHTREC_REG_RS_LSB) +#define LIGHTREC_REG_RS_MASK LIGHTREC_REG_RS(0x3) +#define LIGHTREC_FLAGS_GET_RS(x) \ + (((x) & LIGHTREC_REG_RS_MASK) >> LIGHTREC_REG_RS_LSB) + +#define LIGHTREC_REG_RT_LSB 28 +#define LIGHTREC_REG_RT(x) ((x) << LIGHTREC_REG_RT_LSB) +#define LIGHTREC_REG_RT_MASK LIGHTREC_REG_RT(0x3) +#define LIGHTREC_FLAGS_GET_RT(x) \ + (((x) & LIGHTREC_REG_RT_MASK) >> LIGHTREC_REG_RT_LSB) + +#define LIGHTREC_REG_RD_LSB 30 +#define LIGHTREC_REG_RD(x) ((x) << LIGHTREC_REG_RD_LSB) +#define LIGHTREC_REG_RD_MASK LIGHTREC_REG_RD(0x3) +#define LIGHTREC_FLAGS_GET_RD(x) \ + (((x) & LIGHTREC_REG_RD_MASK) >> LIGHTREC_REG_RD_LSB) + +#define LIGHTREC_REG_NOOP 0x0 +#define LIGHTREC_REG_UNLOAD 0x1 +#define LIGHTREC_REG_DISCARD 0x2 +#define LIGHTREC_REG_CLEAN 0x3 struct block; @@ -209,9 +230,61 @@ struct opcode { struct opcode_i i; struct opcode_j j; }; - u16 flags; + u32 flags; }; void lightrec_print_disassembly(const struct block *block, const u32 *code); +static inline _Bool op_flag_no_ds(u32 flags) +{ + return OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS); +} + +static inline _Bool op_flag_sync(u32 flags) +{ + return OPT_LOCAL_BRANCHES && (flags & LIGHTREC_SYNC); +} + +static inline _Bool op_flag_smc(u32 flags) +{ + return OPT_FLAG_STORES && (flags & LIGHTREC_SMC); +} + +static inline _Bool op_flag_no_invalidate(u32 flags) +{ + return (OPT_FLAG_IO || OPT_FLAG_STORES) && + (flags & LIGHTREC_NO_INVALIDATE); +} + +static inline _Bool op_flag_no_mask(u32 flags) +{ + return OPT_FLAG_IO && (flags & LIGHTREC_NO_MASK); +} + +static inline _Bool op_flag_emulate_branch(u32 flags) +{ + return OPT_DETECT_IMPOSSIBLE_BRANCHES && + (flags & LIGHTREC_EMULATE_BRANCH); +} + +static inline _Bool op_flag_local_branch(u32 flags) +{ + return OPT_LOCAL_BRANCHES && (flags & LIGHTREC_LOCAL_BRANCH); +} + +static inline _Bool op_flag_no_lo(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_LO); +} + +static inline _Bool op_flag_no_hi(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_HI); +} + +static inline _Bool op_flag_no_div_check(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_DIV_CHECK); +} + #endif /* __DISASSEMBLER_H__ */ diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 15e1c6ef..3af04326 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -35,10 +35,11 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, u32 link, bool update_cycles) { struct regcache *reg_cache = state->reg_cache; - u32 cycles = state->cycles; jit_state_t *_jit = block->_jit; const struct opcode *op = &block->opcode_list[offset], *next = &block->opcode_list[offset + 1]; + u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c); + u16 offset_after_eob; jit_note(__FILE__, __LINE__); @@ -57,7 +58,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, } if (has_delay_slot(op->c) && - !(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) { + !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) { cycles += lightrec_cycles_of_opcode(next->c); /* Recompile the delay slot */ @@ -65,8 +66,8 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, lightrec_rec_opcode(state, block, offset + 1); } - /* Store back remaining registers */ - lightrec_storeback_regs(reg_cache, _jit); + /* Clean the remaining registers */ + lightrec_clean_regs(reg_cache, _jit); jit_movr(JIT_V0, reg_new_pc); @@ -75,7 +76,10 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, pr_debug("EOB: %u cycles\n", cycles); } - if (offset - !!(op->flags & LIGHTREC_NO_DS) < block->nb_ops - 1) + offset_after_eob = offset + 1 + + (has_delay_slot(op->c) && !op_flag_no_ds(op->flags)); + + if (offset_after_eob < block->nb_ops) state->branches[state->nb_branches++] = jit_b(); } @@ -87,10 +91,10 @@ void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block, union code c = block->opcode_list[offset].c; u32 cycles = state->cycles; - if (!after_op) - cycles -= lightrec_cycles_of_opcode(c); + if (after_op) + cycles += lightrec_cycles_of_opcode(c); - lightrec_storeback_regs(reg_cache, _jit); + lightrec_clean_regs(reg_cache, _jit); jit_movi(JIT_V0, block->pc + (offset << 2)); jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles); @@ -149,6 +153,43 @@ static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u1 31, get_branch_pc(block, offset, 2), true); } +static void lightrec_do_early_unload(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + const struct opcode *op = &block->opcode_list[offset]; + jit_state_t *_jit = block->_jit; + unsigned int i; + u8 reg; + struct { + u8 reg, op; + } reg_ops[3] = { + { op->r.rd, LIGHTREC_FLAGS_GET_RD(op->flags), }, + { op->i.rt, LIGHTREC_FLAGS_GET_RT(op->flags), }, + { op->i.rs, LIGHTREC_FLAGS_GET_RS(op->flags), }, + }; + + for (i = 0; i < ARRAY_SIZE(reg_ops); i++) { + reg = reg_ops[i].reg; + + switch (reg_ops[i].op) { + case LIGHTREC_REG_UNLOAD: + lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, true); + break; + + case LIGHTREC_REG_DISCARD: + lightrec_discard_reg_if_loaded(reg_cache, reg); + break; + + case LIGHTREC_REG_CLEAN: + lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, false); + break; + default: + break; + }; + } +} + static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code, u32 link, bool unconditional, bool bz) { @@ -159,26 +200,33 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 const struct opcode *op = &block->opcode_list[offset], *next = &block->opcode_list[offset + 1]; jit_node_t *addr; - u8 link_reg; - u32 target_offset, cycles = state->cycles; + u8 link_reg, rs, rt; bool is_forward = (s16)op->i.imm >= -1; + int op_cycles = lightrec_cycles_of_opcode(op->c); + u32 target_offset, cycles = state->cycles + op_cycles; u32 next_pc; jit_note(__FILE__, __LINE__); - if (!(op->flags & LIGHTREC_NO_DS)) + if (!op_flag_no_ds(op->flags)) cycles += lightrec_cycles_of_opcode(next->c); - state->cycles = 0; + state->cycles = -op_cycles; + + if (!unconditional) { + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT); + rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache, + _jit, op->i.rt, REG_EXT); + + /* Unload dead registers before evaluating the branch */ + if (OPT_EARLY_UNLOAD) + lightrec_do_early_unload(state, block, offset); + } if (cycles) jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles); if (!unconditional) { - u8 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT), - rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache, - _jit, op->i.rt, REG_EXT); - /* Generate the branch opcode */ addr = jit_new_node_pww(code, NULL, rs, rt); @@ -186,12 +234,10 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 regs_backup = lightrec_regcache_enter_branch(reg_cache); } - if (op->flags & LIGHTREC_LOCAL_BRANCH) { - if (next && !(op->flags & LIGHTREC_NO_DS)) { - /* Recompile the delay slot */ - if (next->opcode) - lightrec_rec_opcode(state, block, offset + 1); - } + if (op_flag_local_branch(op->flags)) { + /* Recompile the delay slot */ + if (next && next->opcode && !op_flag_no_ds(op->flags)) + lightrec_rec_opcode(state, block, offset + 1); if (link) { /* Update the $ra register */ @@ -200,11 +246,11 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 lightrec_free_reg(reg_cache, link_reg); } - /* Store back remaining registers */ - lightrec_storeback_regs(reg_cache, _jit); + /* Clean remaining registers */ + lightrec_clean_regs(reg_cache, _jit); target_offset = offset + 1 + (s16)op->i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); pr_debug("Adding local branch to offset 0x%x\n", target_offset << 2); branch = &state->local_branches[ @@ -217,7 +263,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0); } - if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) { + if (!op_flag_local_branch(op->flags) || !is_forward) { next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm); lightrec_emit_end_of_block(state, block, offset, -1, next_pc, 31, link, false); @@ -235,7 +281,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 lightrec_free_reg(reg_cache, link_reg); } - if (!(op->flags & LIGHTREC_NO_DS) && next->opcode) + if (!op_flag_no_ds(op->flags) && next->opcode) lightrec_rec_opcode(state, block, offset + 1); } } @@ -769,7 +815,7 @@ static void rec_alu_mult(struct lightrec_cstate *state, { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; + u32 flags = block->opcode_list[offset].flags; u8 reg_lo = get_mult_div_lo(c); u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; @@ -785,18 +831,18 @@ static void rec_alu_mult(struct lightrec_cstate *state, rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); else if (__WORDSIZE == 32) lo = lightrec_alloc_reg_temp(reg_cache, _jit); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT); if (__WORDSIZE == 32) { /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit * operation if the MULT was detected a 32-bit only. */ - if (!(flags & LIGHTREC_NO_HI)) { + if (!op_flag_no_hi(flags)) { if (is_signed) jit_qmulr(lo, hi, rs, rt); else @@ -806,23 +852,23 @@ static void rec_alu_mult(struct lightrec_cstate *state, } } else { /* On 64-bit systems, do a 64*64->64 bit operation. */ - if (flags & LIGHTREC_NO_LO) { + if (op_flag_no_lo(flags)) { jit_mulr(hi, rs, rt); jit_rshi(hi, hi, 32); } else { jit_mulr(lo, rs, rt); /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */ - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) jit_rshi(hi, lo, 32); } } lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); - if (!(flags & LIGHTREC_NO_LO) || __WORDSIZE == 32) + if (!op_flag_no_lo(flags) || __WORDSIZE == 32) lightrec_free_reg(reg_cache, lo); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) lightrec_free_reg(reg_cache, hi); } @@ -831,8 +877,8 @@ static void rec_alu_div(struct lightrec_cstate *state, { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; - bool no_check = flags & LIGHTREC_NO_DIV_CHECK; + u32 flags = block->opcode_list[offset].flags; + bool no_check = op_flag_no_div_check(flags); u8 reg_lo = get_mult_div_lo(c); u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; @@ -849,22 +895,22 @@ static void rec_alu_div(struct lightrec_cstate *state, rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0); /* Jump to special handler if dividing by zero */ if (!no_check) branch = jit_beqi(rt, 0); - if (flags & LIGHTREC_NO_LO) { + if (op_flag_no_lo(flags)) { if (is_signed) jit_remr(hi, rs, rt); else jit_remr_u(hi, rs, rt); - } else if (flags & LIGHTREC_NO_HI) { + } else if (op_flag_no_hi(flags)) { if (is_signed) jit_divr(lo, rs, rt); else @@ -882,7 +928,7 @@ static void rec_alu_div(struct lightrec_cstate *state, jit_patch(branch); - if (!(flags & LIGHTREC_NO_LO)) { + if (!op_flag_no_lo(flags)) { if (is_signed) { jit_lti(lo, rs, 0); jit_lshi(lo, lo, 1); @@ -892,7 +938,7 @@ static void rec_alu_div(struct lightrec_cstate *state, } } - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) jit_movr(hi, rs); jit_patch(to_end); @@ -901,10 +947,10 @@ static void rec_alu_div(struct lightrec_cstate *state, lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lightrec_free_reg(reg_cache, lo); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) lightrec_free_reg(reg_cache, hi); } @@ -993,20 +1039,15 @@ static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - u8 tmp, tmp2; + u8 tmp; tmp = lightrec_alloc_reg_temp(reg_cache, _jit); jit_ldxi(tmp, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, wrappers_eps[wrapper])); if (with_arg) { - tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); - jit_movi(tmp2, arg); - - jit_stxi_i(offsetof(struct lightrec_state, c_wrapper_arg), - LIGHTREC_REG_STATE, tmp2); - - lightrec_free_reg(reg_cache, tmp2); + jit_prepare(); + jit_pushargi(arg); } lightrec_regcache_mark_live(reg_cache, _jit); @@ -1023,7 +1064,7 @@ static void rec_io(struct lightrec_cstate *state, struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; + u32 flags = block->opcode_list[offset].flags; bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags); u32 lut_entry; @@ -1066,7 +1107,7 @@ static void rec_store_memory(struct lightrec_cstate *cstate, s16 imm = (s16)c.i.imm; s32 simm = (s32)imm << (1 - lut_is_32bit(state)); s32 lut_offt = offsetof(struct lightrec_state, code_lut); - bool no_mask = op->flags & LIGHTREC_NO_MASK; + bool no_mask = op_flag_no_mask(op->flags); bool add_imm = c.i.imm && ((!state->mirrors_mapped && !no_mask) || (invalidate && ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); @@ -1191,6 +1232,7 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, jit_note(__FILE__, __LINE__); rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); if (state->offset_ram || state->offset_scratch) @@ -1230,8 +1272,6 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, lightrec_free_reg(reg_cache, tmp2); } - rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - if (is_big_endian() && swap_code && c.i.rt) { tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); @@ -1330,8 +1370,8 @@ static void rec_store(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code, jit_code_t swap_code) { - u16 flags = block->opcode_list[offset].flags; - bool no_invalidate = (flags & LIGHTREC_NO_INVALIDATE) || + u32 flags = block->opcode_list[offset].flags; + bool no_invalidate = op_flag_no_invalidate(flags) || state->state->invalidate_from_dma_only; switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { @@ -1410,7 +1450,7 @@ static void rec_load_memory(struct lightrec_cstate *cstate, struct opcode *op = &block->opcode_list[offset]; jit_state_t *_jit = block->_jit; u8 rs, rt, addr_reg, flags = REG_EXT; - bool no_mask = op->flags & LIGHTREC_NO_MASK; + bool no_mask = op_flag_no_mask(op->flags); union code c = op->c; s16 imm; @@ -1600,7 +1640,7 @@ static void rec_load(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code, jit_code_t swap_code, bool is_unsigned) { - u16 flags = block->opcode_list[offset].flags; + u32 flags = block->opcode_list[offset].flags; switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { case LIGHTREC_IO_RAM: @@ -1712,7 +1752,7 @@ static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u1 call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC); if (c.i.op == OP_CP0 && - !(block->opcode_list[offset].flags & LIGHTREC_NO_DS) && + !op_flag_no_ds(block->opcode_list[offset].flags) && (c.r.rd == 12 || c.r.rd == 13)) lightrec_emit_end_of_block(state, block, offset, -1, get_ds_pc(block, offset, 1), @@ -1835,7 +1875,7 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) lightrec_free_reg(reg_cache, rt); - if (!(block->opcode_list[offset].flags & LIGHTREC_NO_DS) && + if (!op_flag_no_ds(block->opcode_list[offset].flags) && (c.r.rd == 12 || c.r.rd == 13)) lightrec_emit_eob(state, block, offset + 1, true); } @@ -2378,9 +2418,11 @@ void lightrec_rec_opcode(struct lightrec_cstate *state, const struct opcode *op = &block->opcode_list[offset]; jit_state_t *_jit = block->_jit; lightrec_rec_func_t f; + u16 unload_offset; - if (op->flags & LIGHTREC_SYNC) { - jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); + if (op_flag_sync(op->flags)) { + if (state->cycles) + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); state->cycles = 0; lightrec_storeback_regs(reg_cache, _jit); @@ -2401,16 +2443,10 @@ void lightrec_rec_opcode(struct lightrec_cstate *state, (*f)(state, block, offset); } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RD)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->r.rd, true); - pr_debug("Cleaning RD reg %s\n", lightrec_reg_name(op->r.rd)); - } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RS)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true); - pr_debug("Cleaning RS reg %s\n", lightrec_reg_name(op->i.rt)); - } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RT)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true); - pr_debug("Cleaning RT reg %s\n", lightrec_reg_name(op->i.rt)); + if (OPT_EARLY_UNLOAD) { + unload_offset = offset + + (has_delay_slot(op->c) && !op_flag_no_ds(op->flags)); + + lightrec_do_early_unload(state, block, unload_offset); } } diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c index 922f0810..57986d81 100644 --- a/deps/lightrec/interpreter.c +++ b/deps/lightrec/interpreter.c @@ -63,7 +63,7 @@ static inline u32 jump_skip(struct interpreter *inter) inter->op = next_op(inter); inter->offset++; - if (inter->op->flags & LIGHTREC_SYNC) { + if (op_flag_sync(inter->op->flags)) { inter->state->current_cycle += inter->cycles; inter->cycles = 0; } @@ -101,8 +101,8 @@ static void update_cycles_before_branch(struct interpreter *inter) if (!inter->delay_slot) { cycles = lightrec_cycles_of_opcode(inter->op->c); - if (has_delay_slot(inter->op->c) && - !(inter->op->flags & LIGHTREC_NO_DS)) + if (!op_flag_no_ds(inter->op->flags) && + has_delay_slot(inter->op->c)) cycles += lightrec_cycles_of_opcode(next_op(inter)->c); inter->cycles += cycles; @@ -329,7 +329,7 @@ static u32 int_jump(struct interpreter *inter, bool link) if (link) state->regs.gpr[31] = old_pc + 8; - if (inter->op->flags & LIGHTREC_NO_DS) + if (op_flag_no_ds(inter->op->flags)) return pc; return int_delay_slot(inter, pc, true); @@ -348,14 +348,18 @@ static u32 int_JAL(struct interpreter *inter) static u32 int_jumpr(struct interpreter *inter, u8 link_reg) { struct lightrec_state *state = inter->state; - u32 old_pc, next_pc = state->regs.gpr[inter->op->r.rs]; + u32 old_pc = int_get_branch_pc(inter); + u32 next_pc = state->regs.gpr[inter->op->r.rs]; - if (link_reg) { - old_pc = int_get_branch_pc(inter); - state->regs.gpr[link_reg] = old_pc + 8; + if (op_flag_emulate_branch(inter->op->flags) && inter->offset) { + inter->cycles -= lightrec_cycles_of_opcode(inter->op->c); + return old_pc; } - if (inter->op->flags & LIGHTREC_NO_DS) + if (link_reg) + state->regs.gpr[link_reg] = old_pc + 8; + + if (op_flag_no_ds(inter->op->flags)) return next_pc; return int_delay_slot(inter, next_pc, true); @@ -373,8 +377,7 @@ static u32 int_special_JALR(struct interpreter *inter) static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc) { - if (!inter->delay_slot && - (inter->op->flags & LIGHTREC_LOCAL_BRANCH) && + if (!inter->delay_slot && op_flag_local_branch(inter->op->flags) && (s16)inter->op->c.i.imm >= 0) { next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2); next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc); @@ -388,9 +391,14 @@ static u32 int_branch(struct interpreter *inter, u32 pc, { u32 next_pc = pc + 4 + ((s16)code.i.imm << 2); + if (op_flag_emulate_branch(inter->op->flags) && inter->offset) { + inter->cycles -= lightrec_cycles_of_opcode(inter->op->c); + return pc; + } + update_cycles_before_branch(inter); - if (inter->op->flags & LIGHTREC_NO_DS) { + if (op_flag_no_ds(inter->op->flags)) { if (branch) return int_do_branch(inter, pc, next_pc); else @@ -403,7 +411,7 @@ static u32 int_branch(struct interpreter *inter, u32 pc, if (branch) return int_do_branch(inter, pc, next_pc); - if (inter->op->flags & LIGHTREC_EMULATE_BRANCH) + if (op_flag_emulate_branch(inter->op->flags)) return pc + 8; else return jump_after_branch(inter); @@ -497,7 +505,7 @@ static u32 int_ctc(struct interpreter *inter) /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause), * return early so that the emulator will be able to check software * interrupt status. */ - if (!(inter->op->flags & LIGHTREC_NO_DS) && + if (!op_flag_no_ds(inter->op->flags) && op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13)) return int_get_ds_pc(inter, 1); else @@ -618,7 +626,7 @@ static u32 int_store(struct interpreter *inter) { u32 next_pc; - if (likely(!(inter->op->flags & LIGHTREC_SMC))) + if (likely(!op_flag_smc(inter->op->flags))) return int_io(inter, false); lightrec_rw(inter->state, inter->op->c, @@ -765,9 +773,9 @@ static u32 int_special_MULT(struct interpreter *inter) u8 reg_hi = get_mult_div_hi(inter->op->c); u64 res = (s64)rs * (s64)rt; - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = res >> 32; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = res; return jump_next(inter); @@ -782,9 +790,9 @@ static u32 int_special_MULTU(struct interpreter *inter) u8 reg_hi = get_mult_div_hi(inter->op->c); u64 res = (u64)rs * (u64)rt; - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = res >> 32; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = res; return jump_next(inter); @@ -807,9 +815,9 @@ static u32 int_special_DIV(struct interpreter *inter) hi = rs % rt; } - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = hi; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = lo; return jump_next(inter); @@ -832,9 +840,9 @@ static u32 int_special_DIVU(struct interpreter *inter) hi = rs % rt; } - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = hi; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = lo; return jump_next(inter); diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index 3c043d5e..4eedef27 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -137,7 +137,6 @@ struct lightrec_state { u32 target_cycle; u32 exit_flags; u32 old_cycle_counter; - u32 c_wrapper_arg; struct block *dispatcher, *c_wrapper_block; void *c_wrappers[C_WRAPPERS_COUNT]; void *wrappers_eps[C_WRAPPERS_COUNT]; @@ -161,7 +160,7 @@ struct lightrec_state { }; u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u16 *flags, + u32 addr, u32 data, u32 *flags, struct block *block); void lightrec_free_block(struct lightrec_state *state, struct block *block); @@ -238,7 +237,7 @@ static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm) { u16 flags = block->opcode_list[offset].flags; - offset += !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS)); + offset += op_flag_no_ds(flags); return block->pc + (offset + imm << 2); } @@ -247,7 +246,7 @@ static inline u32 get_branch_pc(const struct block *block, u16 offset, s16 imm) { u16 flags = block->opcode_list[offset].flags; - offset -= !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS)); + offset -= op_flag_no_ds(flags); return block->pc + (offset + imm << 2); } @@ -262,7 +261,6 @@ void lightrec_free_cstate(struct lightrec_cstate *cstate); union code lightrec_read_opcode(struct lightrec_state *state, u32 pc); -struct block * lightrec_get_block(struct lightrec_state *state, u32 pc); int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block); void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block); @@ -278,4 +276,9 @@ static inline u8 get_mult_div_hi(union code c) return (OPT_FLAG_MULT_DIV && c.r.imm) ? c.r.imm : REG_HI; } +static inline s16 s16_max(s16 a, s16 b) +{ + return a > b ? a : b; +} + #endif /* __LIGHTREC_PRIVATE_H__ */ diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 3b3fd090..ffa40f09 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -241,7 +241,7 @@ lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) } u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u16 *flags, struct block *block) + u32 addr, u32 data, u32 *flags, struct block *block) { const struct lightrec_mem_map *map; const struct lightrec_mem_map_ops *ops; @@ -309,7 +309,7 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, } static void lightrec_rw_helper(struct lightrec_state *state, - union code op, u16 *flags, + union code op, u32 *flags, struct block *block) { u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs], @@ -331,17 +331,16 @@ static void lightrec_rw_helper(struct lightrec_state *state, } } -static void lightrec_rw_cb(struct lightrec_state *state) +static void lightrec_rw_cb(struct lightrec_state *state, u32 arg) { - lightrec_rw_helper(state, (union code)state->c_wrapper_arg, NULL, NULL); + lightrec_rw_helper(state, (union code) arg, NULL, NULL); } -static void lightrec_rw_generic_cb(struct lightrec_state *state) +static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) { struct block *block; struct opcode *op; bool was_tagged; - u32 arg = state->c_wrapper_arg; u16 offset = (u16)arg; block = lightrec_find_block_from_lut(state->block_cache, @@ -362,6 +361,7 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state) "for recompilation\n", block->pc); block->flags |= BLOCK_SHOULD_RECOMPILE; + lut_write(state, lut_offset(block->pc), NULL); } } @@ -550,9 +550,9 @@ void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) lightrec_mtc2(state, op.r.rd, data); } -static void lightrec_mtc_cb(struct lightrec_state *state) +static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg) { - union code op = (union code) state->c_wrapper_arg; + union code op = (union code) arg; lightrec_mtc(state, op, state->regs.gpr[op.r.rt]); } @@ -581,9 +581,9 @@ void lightrec_cp(struct lightrec_state *state, union code op) (*state->ops.cop2_op)(state, op.opcode); } -static void lightrec_cp_cb(struct lightrec_state *state) +static void lightrec_cp_cb(struct lightrec_state *state, u32 arg) { - lightrec_cp(state, (union code) state->c_wrapper_arg); + lightrec_cp(state, (union code) arg); } static void lightrec_syscall_cb(struct lightrec_state *state) @@ -596,7 +596,7 @@ static void lightrec_break_cb(struct lightrec_state *state) lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK); } -struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) +static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) { struct block *block = lightrec_find_block(state->block_cache, pc); @@ -707,11 +707,11 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) } static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, - void (*f)(struct lightrec_state *)) + void (*f)(struct lightrec_state *, u32), u32 arg) { state->current_cycle = state->target_cycle - cycles_delta; - (*f)(state); + (*f)(state, arg); return state->target_cycle - state->current_cycle; } @@ -857,6 +857,8 @@ static struct block * generate_wrapper(struct lightrec_state *state) for (i = 0; i < NUM_TEMPS; i++) jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); + jit_getarg(JIT_R1, jit_arg()); + /* Jump to the trampoline */ to_tramp = jit_jmpi(); @@ -889,6 +891,7 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(LIGHTREC_REG_CYCLE); jit_pushargr(JIT_R0); + jit_pushargr(JIT_R1); jit_finishi(c_function_wrapper); jit_retval_i(LIGHTREC_REG_CYCLE); @@ -1353,18 +1356,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, continue; } - cstate->cycles += lightrec_cycles_of_opcode(elm->c); - if (should_emulate(elm)) { pr_debug("Branch at offset 0x%x will be emulated\n", i << 2); lightrec_emit_eob(cstate, block, i, false); - skip_next = !(elm->flags & LIGHTREC_NO_DS); + skip_next = !op_flag_no_ds(elm->flags); } else { lightrec_rec_opcode(cstate, block, i); - skip_next = has_delay_slot(elm->c) && - !(elm->flags & LIGHTREC_NO_DS); + skip_next = !op_flag_no_ds(elm->flags) && has_delay_slot(elm->c); #if _WIN32 /* FIXME: GNU Lightning on Windows seems to use our * mapped registers as temporaries. Until the actual bug @@ -1373,6 +1373,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, lightrec_regcache_mark_live(cstate->reg_cache, _jit); #endif } + + cstate->cycles += lightrec_cycles_of_opcode(elm->c); } for (i = 0; i < cstate->nb_branches; i++) @@ -1401,11 +1403,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_err("Unable to find branch target\n"); } - jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, eob_wrapper_func)); - - jit_jmpr(JIT_R0); - + jit_patch_abs(jit_jmpi(), state->eob_wrapper_func); jit_ret(); jit_epilog(); @@ -1767,6 +1765,10 @@ void lightrec_destroy(struct lightrec_state *state) state->current_cycle = ~state->current_cycle; lightrec_print_info(state); + lightrec_free_block_cache(state->block_cache); + lightrec_free_block(state, state->dispatcher); + lightrec_free_block(state, state->c_wrapper_block); + if (ENABLE_THREADED_COMPILER) { lightrec_free_recompiler(state->rec); lightrec_reaper_destroy(state->reaper); @@ -1774,9 +1776,6 @@ void lightrec_destroy(struct lightrec_state *state) lightrec_free_cstate(state->cstate); } - lightrec_free_block_cache(state->block_cache); - lightrec_free_block(state, state->dispatcher); - lightrec_free_block(state, state->c_wrapper_block); finish_jit(); if (ENABLE_CODE_BUFFER && state->tlsf) tlsf_destroy(state->tlsf); diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 8ee66ad0..8da84eee 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -69,6 +69,9 @@ static u64 opcode_read_mask(union code op) case OP_SPECIAL_MFLO: return BIT(REG_LO); case OP_SPECIAL_SLL: + if (!op.r.imm) + return 0; + fallthrough; case OP_SPECIAL_SRL: case OP_SPECIAL_SRA: return BIT(op.r.rt); @@ -99,6 +102,9 @@ static u64 opcode_read_mask(union code op) case OP_LUI: return 0; case OP_BEQ: + if (op.i.rs == op.i.rt) + return 0; + fallthrough; case OP_BNE: case OP_LWL: case OP_LWR: @@ -144,6 +150,10 @@ static u64 opcode_write_mask(union code op) return BIT(REG_HI); case OP_SPECIAL_MTLO: return BIT(REG_LO); + case OP_SPECIAL_SLL: + if (!op.r.imm) + return 0; + fallthrough; default: return BIT(op.r.rd); } @@ -214,7 +224,7 @@ static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 r union code c; unsigned int i; - if (list[offset].flags & LIGHTREC_SYNC) + if (op_flag_sync(list[offset].flags)) return -1; for (i = offset; i > 0; i--) { @@ -227,7 +237,7 @@ static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 r return i - 1; } - if ((list[i - 1].flags & LIGHTREC_SYNC) || + if (op_flag_sync(list[i - 1].flags) || has_delay_slot(c) || opcode_reads_register(c, reg)) break; @@ -241,7 +251,7 @@ static int find_next_reader(const struct opcode *list, unsigned int offset, u8 r unsigned int i; union code c; - if (list[offset].flags & LIGHTREC_SYNC) + if (op_flag_sync(list[offset].flags)) return -1; for (i = offset; ; i++) { @@ -254,7 +264,7 @@ static int find_next_reader(const struct opcode *list, unsigned int offset, u8 r return i; } - if ((list[i].flags & LIGHTREC_SYNC) || + if (op_flag_sync(list[i].flags) || has_delay_slot(c) || opcode_writes_register(c, reg)) break; } @@ -266,7 +276,7 @@ static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg) { unsigned int i; - if (list[offset].flags & LIGHTREC_SYNC) + if (op_flag_sync(list[offset].flags)) return false; for (i = offset + 1; ; i++) { @@ -277,7 +287,7 @@ static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg) return true; if (has_delay_slot(list[i].c)) { - if (list[i].flags & LIGHTREC_NO_DS || + if (op_flag_no_ds(list[i].flags) || opcode_reads_register(list[i + 1].c, reg)) return false; @@ -470,7 +480,7 @@ static u32 lightrec_propagate_consts(const struct opcode *op, known |= BIT(0); v[0] = 0; - if (op->flags & LIGHTREC_SYNC) + if (op_flag_sync(op->flags)) return BIT(0); switch (c.i.op) { @@ -824,7 +834,7 @@ static void lightrec_remove_useless_lui(struct block *block, unsigned int offset *op = &block->opcode_list[offset]; int reader; - if (!(op->flags & LIGHTREC_SYNC) && (known & BIT(op->i.rt)) && + if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) && values[op->i.rt] == op->i.imm << 16) { pr_debug("Converting duplicated LUI to NOP\n"); op->opcode = 0x0; @@ -877,6 +887,38 @@ static void lightrec_modify_lui(struct block *block, unsigned int offset) } } +static int lightrec_transform_branches(struct lightrec_state *state, + struct block *block) +{ + struct opcode *op; + unsigned int i; + s32 offset; + + for (i = 0; i < block->nb_ops; i++) { + op = &block->opcode_list[i]; + + switch (op->i.op) { + case OP_J: + /* Transform J opcode into BEQ $zero, $zero if possible. */ + offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm) + - (s32)(block->pc >> 2) - (s32)i - 1; + + if (offset == (s16)offset) { + pr_debug("Transform J into BEQ $zero, $zero\n"); + op->i.op = OP_BEQ; + op->i.rs = 0; + op->i.rt = 0; + op->i.imm = offset; + + } + default: /* fall-through */ + break; + } + } + + return 0; +} + static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { struct opcode *list = block->opcode_list; @@ -991,7 +1033,7 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc struct opcode *list, *next = &block->opcode_list[0]; unsigned int i; union code op, next_op; - u8 flags; + u32 flags; for (i = 0; i < block->nb_ops - 1; i++) { list = next; @@ -999,17 +1041,16 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc next_op = next->c; op = list->c; - if (!has_delay_slot(op) || - list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) || + if (!has_delay_slot(op) || op_flag_no_ds(list->flags) || + op_flag_emulate_branch(list->flags) || op.opcode == 0 || next_op.opcode == 0) continue; if (i && has_delay_slot(block->opcode_list[i - 1].c) && - !(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS)) + !op_flag_no_ds(block->opcode_list[i - 1].flags)) continue; - if ((list->flags & LIGHTREC_SYNC) || - (next->flags & LIGHTREC_SYNC)) + if (op_flag_sync(list->flags) || op_flag_sync(next->flags)) continue; switch (list->i.op) { @@ -1113,13 +1154,14 @@ static int shrink_opcode_list(struct lightrec_state *state, struct block *block, static int lightrec_detect_impossible_branches(struct lightrec_state *state, struct block *block) { - struct opcode *op, *next = &block->opcode_list[0]; + struct opcode *op, *list = block->opcode_list, *next = &list[0]; unsigned int i; int ret = 0; + s16 offset; for (i = 0; i < block->nb_ops - 1; i++) { op = next; - next = &block->opcode_list[i + 1]; + next = &list[i + 1]; if (!has_delay_slot(op->c) || (!load_in_delay_slot(next->c) && @@ -1134,9 +1176,23 @@ static int lightrec_detect_impossible_branches(struct lightrec_state *state, continue; } + offset = i + 1 + (s16)op->i.imm; + if (load_in_delay_slot(next->c) && + (offset >= 0 && offset < block->nb_ops) && + !opcode_reads_register(list[offset].c, next->c.i.rt)) { + /* The 'impossible' branch is a local branch - we can + * verify here that the first opcode of the target does + * not use the target register of the delay slot */ + + pr_debug("Branch at offset 0x%x has load delay slot, " + "but is local and dest opcode does not read " + "dest register\n", i << 2); + continue; + } + op->flags |= LIGHTREC_EMULATE_BRANCH; - if (op == block->opcode_list) { + if (op == list) { pr_debug("First opcode of block PC 0x%08x is an impossible branch\n", block->pc); @@ -1225,57 +1281,163 @@ bool has_delay_slot(union code op) bool should_emulate(const struct opcode *list) { - return has_delay_slot(list->c) && - (list->flags & LIGHTREC_EMULATE_BRANCH); + return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c); +} + +static bool op_writes_rd(union code c) +{ + switch (c.i.op) { + case OP_SPECIAL: + case OP_META_MOV: + return true; + default: + return false; + } +} + +static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op) +{ + if (op_writes_rd(op->c) && reg == op->r.rd) + op->flags |= LIGHTREC_REG_RD(reg_op); + else if (op->i.rs == reg) + op->flags |= LIGHTREC_REG_RS(reg_op); + else if (op->i.rt == reg) + op->flags |= LIGHTREC_REG_RT(reg_op); + else + pr_debug("Cannot add unload/clean/discard flag: " + "opcode does not touch register %s!\n", + lightrec_reg_name(reg)); } static void lightrec_add_unload(struct opcode *op, u8 reg) { - if (op->i.op == OP_SPECIAL && reg == op->r.rd) - op->flags |= LIGHTREC_UNLOAD_RD; + lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD); +} - if (op->i.rs == reg) - op->flags |= LIGHTREC_UNLOAD_RS; - if (op->i.rt == reg) - op->flags |= LIGHTREC_UNLOAD_RT; +static void lightrec_add_discard(struct opcode *op, u8 reg) +{ + lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD); +} + +static void lightrec_add_clean(struct opcode *op, u8 reg) +{ + lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN); +} + +static void +lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w) +{ + unsigned int reg; + s16 offset; + + for (reg = 0; reg < 34; reg++) { + offset = s16_max(last_w[reg], last_r[reg]); + + if (offset >= 0) + lightrec_add_unload(&list[offset], reg); + } + + memset(last_r, 0xff, sizeof(*last_r) * 34); + memset(last_w, 0xff, sizeof(*last_w) * 34); } static int lightrec_early_unload(struct lightrec_state *state, struct block *block) { - unsigned int i, offset; + u16 i, offset; struct opcode *op; + s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0; + u64 mask_r, mask_w, dirty = 0, loaded = 0; u8 reg; - for (reg = 1; reg < 34; reg++) { - int last_r_id = -1, last_w_id = -1; + memset(last_r, 0xff, sizeof(last_r)); + memset(last_w, 0xff, sizeof(last_w)); - for (i = 0; i < block->nb_ops; i++) { - union code c = block->opcode_list[i].c; + /* + * Clean if: + * - the register is dirty, and is read again after a branch opcode + * + * Unload if: + * - the register is dirty or loaded, and is not read again + * - the register is dirty or loaded, and is written again after a branch opcode + * - the next opcode has the SYNC flag set + * + * Discard if: + * - the register is dirty or loaded, and is written again + */ - if (opcode_reads_register(c, reg)) - last_r_id = i; - if (opcode_writes_register(c, reg)) - last_w_id = i; + for (i = 0; i < block->nb_ops; i++) { + op = &block->opcode_list[i]; + + if (op_flag_sync(op->flags) || should_emulate(op)) { + /* The next opcode has the SYNC flag set, or is a branch + * that should be emulated: unload all registers. */ + lightrec_early_unload_sync(block->opcode_list, last_r, last_w); + dirty = 0; + loaded = 0; } - if (last_w_id > last_r_id) - offset = (unsigned int)last_w_id; - else if (last_r_id >= 0) - offset = (unsigned int)last_r_id; - else - continue; + if (next_sync == i) { + last_sync = i; + pr_debug("Last sync: 0x%x\n", last_sync << 2); + } - op = &block->opcode_list[offset]; + if (has_delay_slot(op->c)) { + next_sync = i + 1 + !op_flag_no_ds(op->flags); + pr_debug("Next sync: 0x%x\n", next_sync << 2); + } - if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS)) - offset++; + mask_r = opcode_read_mask(op->c); + mask_w = opcode_write_mask(op->c); - if (offset == block->nb_ops) - continue; + for (reg = 0; reg < 34; reg++) { + if (mask_r & BIT(reg)) { + if (dirty & BIT(reg) && last_w[reg] < last_sync) { + /* The register is dirty, and is read + * again after a branch: clean it */ + + lightrec_add_clean(&block->opcode_list[last_w[reg]], reg); + dirty &= ~BIT(reg); + loaded |= BIT(reg); + } + + last_r[reg] = i; + } + + if (mask_w & BIT(reg)) { + if ((dirty & BIT(reg) && last_w[reg] < last_sync) || + (loaded & BIT(reg) && last_r[reg] < last_sync)) { + /* The register is dirty or loaded, and + * is written again after a branch: + * unload it */ + + offset = s16_max(last_w[reg], last_r[reg]); + lightrec_add_unload(&block->opcode_list[offset], reg); + dirty &= ~BIT(reg); + loaded &= ~BIT(reg); + } else if (!(mask_r & BIT(reg)) && + ((dirty & BIT(reg) && last_w[reg] > last_sync) || + (loaded & BIT(reg) && last_r[reg] > last_sync))) { + /* The register is dirty or loaded, and + * is written again: discard it */ + + offset = s16_max(last_w[reg], last_r[reg]); + lightrec_add_discard(&block->opcode_list[offset], reg); + dirty &= ~BIT(reg); + loaded &= ~BIT(reg); + } + + last_w[reg] = i; + } - lightrec_add_unload(&block->opcode_list[offset], reg); + } + + dirty |= mask_w; + loaded |= mask_r; } + /* Unload all registers that are dirty or loaded at the end of block. */ + lightrec_early_unload_sync(block->opcode_list, last_r, last_w); + return 0; } @@ -1310,6 +1472,7 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) "requiring invalidation\n", list->opcode); list->flags |= LIGHTREC_NO_INVALIDATE; + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); } /* Detect writes whose destination address is inside the @@ -1341,6 +1504,8 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) kunseg_val = kunseg(val); psx_map = lightrec_get_map_idx(state, kunseg_val); + list->flags &= ~LIGHTREC_IO_MASK; + switch (psx_map) { case PSX_MAP_KERNEL_USER_RAM: if (val == kunseg_val) @@ -1400,7 +1565,7 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, mask |= opcode_read_mask(op->c); mask |= opcode_write_mask(op->c); - if (op->flags & LIGHTREC_SYNC) + if (op_flag_sync(op->flags)) sync = true; switch (op->i.op) { @@ -1410,11 +1575,10 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, case OP_BGTZ: case OP_REGIMM: /* TODO: handle backwards branches too */ - if (!last && - (op->flags & LIGHTREC_LOCAL_BRANCH) && + if (!last && op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) { branch_offset = i + 1 + (s16)op->c.i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); reg = get_mfhi_mflo_reg(block, branch_offset, NULL, mask, sync, mflo, false); @@ -1446,8 +1610,7 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, if (op->r.rs != 31) return reg; - if (!sync && - !(op->flags & LIGHTREC_NO_DS) && + if (!sync && !op_flag_no_ds(op->flags) && (next->i.op == OP_SPECIAL) && ((!mflo && next->r.op == OP_SPECIAL_MFHI) || (mflo && next->r.op == OP_SPECIAL_MFLO))) @@ -1520,10 +1683,9 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset, case OP_BGTZ: case OP_REGIMM: /* TODO: handle backwards branches too */ - if ((op->flags & LIGHTREC_LOCAL_BRANCH) && - (s16)op->c.i.imm >= 0) { + if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) { branch_offset = i + 1 + (s16)op->c.i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); lightrec_replace_lo_hi(block, branch_offset, last, lo); lightrec_replace_lo_hi(block, i + 1, branch_offset, lo); @@ -1595,7 +1757,7 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * /* Don't support opcodes in delay slots */ if ((i && has_delay_slot(block->opcode_list[i - 1].c)) || - (list->flags & LIGHTREC_NO_DS)) { + op_flag_no_ds(list->flags)) { continue; } @@ -1781,6 +1943,7 @@ static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block * IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence), IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset), IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches), + IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches), IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches), IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops), IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots), diff --git a/deps/lightrec/regcache.c b/deps/lightrec/regcache.c index c0188706..791a9c5c 100644 --- a/deps/lightrec/regcache.c +++ b/deps/lightrec/regcache.c @@ -493,6 +493,15 @@ void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, } } +void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg) +{ + struct native_register *nreg; + + nreg = find_mapped_reg(cache, reg, false); + if (nreg) + lightrec_discard_nreg(nreg); +} + struct native_register * lightrec_regcache_enter_branch(struct regcache *cache) { struct native_register *backup; diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h index 835c9c92..5aa5050f 100644 --- a/deps/lightrec/regcache.h +++ b/deps/lightrec/regcache.h @@ -50,6 +50,7 @@ void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit); void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, u8 reg, _Bool unload); +void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg); u8 lightrec_alloc_reg_in_address(struct regcache *cache, jit_state_t *_jit, u8 reg, s16 offset); diff --git a/libpcsxcore/lightrec/plugin.c b/libpcsxcore/lightrec/plugin.c index b347bb63..52d37f0f 100644 --- a/libpcsxcore/lightrec/plugin.c +++ b/libpcsxcore/lightrec/plugin.c @@ -556,11 +556,14 @@ static void lightrec_plugin_reset(void) { struct lightrec_registers *regs; - lightrec_plugin_shutdown(); - lightrec_plugin_init(); - regs = lightrec_get_registers(lightrec_state); + /* Invalidate all blocks */ + lightrec_invalidate_all(lightrec_state); + + /* Reset registers */ + memset(regs, 0, sizeof(*regs)); + regs->cp0[12] = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1 regs->cp0[15] = 0x00000002; // PRevID = Revision ID, same as R3000A