From 684432ad1a2eb287f74d7211762d273843426966 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sat, 26 Aug 2023 10:38:47 +0200 Subject: [PATCH] git subrepo pull --force deps/lightrec subrepo: subdir: "deps/lightrec" merged: "afc75e49d7" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "afc75e49d7" git-subrepo: version: "0.4.6" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "110b9eb" --- deps/lightrec/.gitrepo | 6 +- deps/lightrec/CMakeLists.txt | 7 +- deps/lightrec/constprop.c | 26 +-- deps/lightrec/disassembler.c | 11 +- deps/lightrec/disassembler.h | 3 + deps/lightrec/emitter.c | 285 ++++++++++++++++++------ deps/lightrec/interpreter.c | 20 +- deps/lightrec/lightrec-config.h.cmakein | 1 + deps/lightrec/lightrec-private.h | 20 +- deps/lightrec/lightrec.c | 37 +-- deps/lightrec/lightrec.h | 9 +- deps/lightrec/optimizer.c | 97 +++++++- deps/lightrec/regcache.c | 31 +++ deps/lightrec/regcache.h | 5 + 14 files changed, 431 insertions(+), 127 deletions(-) diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 6d4845d0..b2e393a3 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = 15216f3a2e7207432245682e80d97dc361f28fde - parent = 266ec37b97da56dbf277426d3f4b5ef833a08e55 + commit = afc75e49d7eb8c36697e969cb980757974630b3e + parent = ba86ff938a6b17c171dd68ebdf897ca3e30550f8 method = merge - cmdver = 0.4.3 + cmdver = 0.4.6 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index 9518a9ab..dfe35a02 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -1,5 +1,5 @@ -cmake_minimum_required(VERSION 3.0) -project(lightrec LANGUAGES C VERSION 0.7) +cmake_minimum_required(VERSION 3.5) +project(lightrec LANGUAGES C VERSION 0.8) set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries") if (NOT BUILD_SHARED_LIBS) @@ -73,6 +73,7 @@ option(OPT_SWITCH_DELAY_SLOTS "(optimization) Switch delay slots" ON) option(OPT_FLAG_IO "(optimization) Flag I/O opcodes when the target can be detected" ON) option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/LO" ON) option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON) +option(OPT_PRELOAD_PC "(optimization) Preload PC value into register" ON) include_directories(${CMAKE_CURRENT_BINARY_DIR}) @@ -87,7 +88,7 @@ set_target_properties(${PROJECT_NAME} PROPERTIES ) if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang") - target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wno-parentheses) + target_compile_options(${PROJECT_NAME} PRIVATE -Wall) endif() if (CMAKE_C_COMPILER_ID STREQUAL "Clang") target_compile_options(${PROJECT_NAME} PRIVATE -Wno-initializer-overrides) diff --git a/deps/lightrec/constprop.c b/deps/lightrec/constprop.c index 8499c6ec..bdae0e2f 100644 --- a/deps/lightrec/constprop.c +++ b/deps/lightrec/constprop.c @@ -329,7 +329,7 @@ void lightrec_consts_propagate(const struct block *block, case OP_SPECIAL_SRL: v[c.r.rd].value = v[c.r.rt].value >> c.r.imm; v[c.r.rd].known = (v[c.r.rt].known >> c.r.imm) - | (BIT(c.r.imm) - 1 << 32 - c.r.imm); + | ((BIT(c.r.imm) - 1) << (32 - c.r.imm)); v[c.r.rd].sign = c.r.imm ? 0 : v[c.r.rt].sign; break; @@ -357,7 +357,7 @@ void lightrec_consts_propagate(const struct block *block, imm = v[c.r.rs].value & 0x1f; v[c.r.rd].value = v[c.r.rt].value >> imm; v[c.r.rd].known = (v[c.r.rt].known >> imm) - | (BIT(imm) - 1 << 32 - imm); + | ((BIT(imm) - 1) << (32 - imm)); if (imm) v[c.r.rd].sign = 0; } else { @@ -459,7 +459,7 @@ void lightrec_consts_propagate(const struct block *block, case OP_SPECIAL_JALR: v[c.r.rd].known = 0xffffffff; v[c.r.rd].sign = 0; - v[c.r.rd].value = block->pc + (idx + 2 << 2); + v[c.r.rd].value = block->pc + ((idx + 2) << 2); break; default: @@ -484,18 +484,18 @@ void lightrec_consts_propagate(const struct block *block, if (OPT_FLAG_MULT_DIV && c.r.imm) { if (c.r.op >= 32) { - v[c.r.imm].value = v[c.r.rs].value << c.r.op - 32; - v[c.r.imm].known = (v[c.r.rs].known << c.r.op - 32) + v[c.r.imm].value = v[c.r.rs].value << (c.r.op - 32); + v[c.r.imm].known = (v[c.r.rs].known << (c.r.op - 32)) | (BIT(c.r.op - 32) - 1); - v[c.r.imm].sign = v[c.r.rs].sign << c.r.op - 32; + v[c.r.imm].sign = v[c.r.rs].sign << (c.r.op - 32); } else if (c.i.op == OP_META_MULT2) { - v[c.r.imm].value = (s32)v[c.r.rs].value >> 32 - c.r.op; - v[c.r.imm].known = (s32)v[c.r.rs].known >> 32 - c.r.op; - v[c.r.imm].sign = (s32)v[c.r.rs].sign >> 32 - c.r.op; + v[c.r.imm].value = (s32)v[c.r.rs].value >> (32 - c.r.op); + v[c.r.imm].known = (s32)v[c.r.rs].known >> (32 - c.r.op); + v[c.r.imm].sign = (s32)v[c.r.rs].sign >> (32 - c.r.op); } else { - v[c.r.imm].value = v[c.r.rs].value >> 32 - c.r.op; - v[c.r.imm].known = v[c.r.rs].known >> 32 - c.r.op; - v[c.r.imm].sign = v[c.r.rs].sign >> 32 - c.r.op; + v[c.r.imm].value = v[c.r.rs].value >> (32 - c.r.op); + v[c.r.imm].known = v[c.r.rs].known >> (32 - c.r.op); + v[c.r.imm].sign = v[c.r.rs].sign >> (32 - c.r.op); } } break; @@ -705,7 +705,7 @@ void lightrec_consts_propagate(const struct block *block, case OP_JAL: v[31].known = 0xffffffff; v[31].sign = 0; - v[31].value = block->pc + (idx + 2 << 2); + v[31].value = block->pc + ((idx + 2) << 2); break; default: diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index f687d28c..8bfaf4d0 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -157,6 +157,10 @@ static const char * const opcode_branch_flags[] = { "local branch", }; +static const char * const opcode_movi_flags[] = { + "movi", +}; + static const char * const opcode_multdiv_flags[] = { "No LO", "No HI", @@ -403,10 +407,13 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, pc + 4 + ((s16)c.i.imm << 2)); case OP_ADDI: case OP_ADDIU: + case OP_ORI: + *flags_ptr = opcode_movi_flags; + *nb_flags = ARRAY_SIZE(opcode_movi_flags); + fallthrough; case OP_SLTI: case OP_SLTIU: case OP_ANDI: - case OP_ORI: case OP_XORI: return snprintf(buf, len, "%s%s,%s,0x%04hx", std_opcodes[c.i.op], @@ -415,6 +422,8 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, (u16)c.i.imm); case OP_LUI: + *flags_ptr = opcode_movi_flags; + *nb_flags = ARRAY_SIZE(opcode_movi_flags); return snprintf(buf, len, "%s%s,0x%04hx", std_opcodes[c.i.op], lightrec_reg_name(c.i.rt), diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h index 9e39484c..1804f884 100644 --- a/deps/lightrec/disassembler.h +++ b/deps/lightrec/disassembler.h @@ -20,6 +20,9 @@ #define LIGHTREC_NO_DS BIT(0) #define LIGHTREC_SYNC BIT(1) +/* Flags for LUI, ORI, ADDIU */ +#define LIGHTREC_MOVI BIT(2) + /* Flags for load/store opcodes */ #define LIGHTREC_SMC BIT(2) #define LIGHTREC_NO_INVALIDATE BIT(3) diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 02510f0e..b7ace194 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -75,7 +75,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, jit_state_t *_jit = block->_jit; const struct opcode *op = &block->opcode_list[offset], *ds = get_delay_slot(block->opcode_list, offset); - u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c); + u32 cycles = state->cycles + lightrec_cycles_of_opcode(state->state, op->c); jit_note(__FILE__, __LINE__); @@ -96,7 +96,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, if (has_delay_slot(op->c) && !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) { - cycles += lightrec_cycles_of_opcode(ds->c); + cycles += lightrec_cycles_of_opcode(state->state, ds->c); /* Recompile the delay slot */ if (ds->c.opcode) @@ -256,7 +256,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 *ds = get_delay_slot(block->opcode_list, offset); jit_node_t *addr; bool is_forward = (s16)op->i.imm >= 0; - int op_cycles = lightrec_cycles_of_opcode(op->c); + int op_cycles = lightrec_cycles_of_opcode(state->state, op->c); u32 target_offset, cycles = state->cycles + op_cycles; bool no_indirection = false; u32 next_pc; @@ -265,7 +265,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 jit_note(__FILE__, __LINE__); if (!op_flag_no_ds(op->flags)) - cycles += lightrec_cycles_of_opcode(ds->c); + cycles += lightrec_cycles_of_opcode(state->state, ds->c); state->cycles = -op_cycles; @@ -429,6 +429,36 @@ static void rec_regimm_BGEZAL(struct lightrec_cstate *state, !op->i.rs, true); } +static void rec_alloc_rs_rd(struct regcache *reg_cache, + jit_state_t *_jit, + const struct opcode *op, + u8 rs, u8 rd, + u8 in_flags, u8 out_flags, + u8 *rs_out, u8 *rd_out) +{ + bool unload, discard; + u32 unload_flags; + + if (OPT_EARLY_UNLOAD) { + unload_flags = LIGHTREC_FLAGS_GET_RS(op->flags); + unload = unload_flags == LIGHTREC_REG_UNLOAD; + discard = unload_flags == LIGHTREC_REG_DISCARD; + } + + if (OPT_EARLY_UNLOAD && rs && rd != rs && (unload || discard)) { + rs = lightrec_alloc_reg_in(reg_cache, _jit, rs, in_flags); + lightrec_remap_reg(reg_cache, _jit, rs, rd, discard); + lightrec_set_reg_out_flags(reg_cache, rs, out_flags); + rd = rs; + } else { + rs = lightrec_alloc_reg_in(reg_cache, _jit, rs, in_flags); + rd = lightrec_alloc_reg_out(reg_cache, _jit, rd, out_flags); + } + + *rs_out = rs; + *rd_out = rd; +} + static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code, bool slti) { @@ -441,8 +471,9 @@ static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block out_flags |= REG_ZEXT; jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, REG_EXT); - rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, out_flags); + + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.i.rs, c.i.rt, REG_EXT, out_flags, &rs, &rt); jit_new_node_www(code, rt, rs, (s32)(s16) c.i.imm); @@ -459,10 +490,11 @@ static void rec_alu_special(struct lightrec_cstate *state, const struct block *b u8 rd, rt, rs; jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, REG_EXT); + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, REG_EXT); - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, - out_ext ? REG_EXT | REG_ZEXT : 0); + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.r.rs, c.r.rd, REG_EXT, + out_ext ? REG_EXT | REG_ZEXT : 0, &rs, &rd); jit_new_node_www(code, rd, rs, rt); @@ -480,17 +512,17 @@ static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *bl u8 rd, rt, rs, temp, flags = 0; jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0); if (code == jit_code_rshr) flags = REG_EXT; else if (code == jit_code_rshr_u) flags = REG_ZEXT; - rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags); - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags); + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0); + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.r.rt, c.r.rd, flags, flags, &rt, &rd); - if (rs != rd && rt != rd) { + if (rt != rd) { jit_andi(rd, rs, 0x1f); jit_new_node_www(code, rd, rt, rd); } else { @@ -512,14 +544,18 @@ static void rec_movi(struct lightrec_cstate *state, union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; u16 flags = REG_EXT; + s32 value = (s32)(s16) c.i.imm; u8 rt; - if (!(c.i.imm & 0x8000)) + if (block->opcode_list[offset].flags & LIGHTREC_MOVI) + value += (s32)((u32)state->movi_temp[c.i.rt] << 16); + + if (value >= 0) flags |= REG_ZEXT; rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); - jit_movi(rt, (s32)(s16) c.i.imm); + jit_movi(rt, value); lightrec_free_reg(reg_cache, rt); } @@ -527,9 +563,11 @@ static void rec_movi(struct lightrec_cstate *state, static void rec_ADDIU(struct lightrec_cstate *state, const struct block *block, u16 offset) { + const struct opcode *op = &block->opcode_list[offset]; + _jit_name(block->_jit, __func__); - if (block->opcode_list[offset].c.i.rs) + if (op->i.rs && !(op->flags & LIGHTREC_MOVI)) rec_alu_imm(state, block, offset, jit_code_addi, false); else rec_movi(state, block, offset); @@ -567,9 +605,9 @@ static void rec_ANDI(struct lightrec_cstate *state, _jit_name(block->_jit, __func__); jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); - rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, - REG_EXT | REG_ZEXT); + + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.i.rs, c.i.rt, 0, REG_EXT | REG_ZEXT, &rs, &rt); /* PSX code uses ANDI 0xff / ANDI 0xffff a lot, which are basically * casts to uint8_t / uint16_t. */ @@ -593,8 +631,9 @@ static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *bl u8 rs, rt, flags; jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); - rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, 0); + + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.i.rs, c.i.rt, 0, 0, &rs, &rt); flags = lightrec_get_reg_in_flags(reg_cache, rs); lightrec_set_reg_out_flags(reg_cache, rt, flags); @@ -609,8 +648,24 @@ static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *bl static void rec_ORI(struct lightrec_cstate *state, const struct block *block, u16 offset) { - _jit_name(block->_jit, __func__); - rec_alu_or_xor(state, block, offset, jit_code_ori); + const struct opcode *op = &block->opcode_list[offset]; + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + s32 val; + u8 rt; + + _jit_name(_jit, __func__); + + if (op->flags & LIGHTREC_MOVI) { + rt = lightrec_alloc_reg_out(reg_cache, _jit, op->i.rt, REG_EXT); + + val = ((u32)state->movi_temp[op->i.rt] << 16) | op->i.imm; + jit_movi(rt, val); + + lightrec_free_reg(reg_cache, rt); + } else { + rec_alu_or_xor(state, block, offset, jit_code_ori); + } } static void rec_XORI(struct lightrec_cstate *state, @@ -628,6 +683,11 @@ static void rec_LUI(struct lightrec_cstate *state, jit_state_t *_jit = block->_jit; u8 rt, flags = REG_EXT; + if (block->opcode_list[offset].flags & LIGHTREC_MOVI) { + state->movi_temp[c.i.rt] = c.i.imm; + return; + } + jit_name(__func__); jit_note(__FILE__, __LINE__); @@ -681,9 +741,10 @@ static void rec_special_AND(struct lightrec_cstate *state, _jit_name(block->_jit, __func__); jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0); + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0); - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0); + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.r.rs, c.r.rd, 0, 0, &rs, &rd); flags_rs = lightrec_get_reg_in_flags(reg_cache, rs); flags_rt = lightrec_get_reg_in_flags(reg_cache, rt); @@ -715,9 +776,10 @@ static void rec_special_or_nor(struct lightrec_cstate *state, u8 rd, rt, rs, flags_rs, flags_rt, flags_rd = 0; jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0); + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0); - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0); + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.r.rs, c.r.rd, 0, 0, &rs, &rd); flags_rs = lightrec_get_reg_in_flags(reg_cache, rs); flags_rt = lightrec_get_reg_in_flags(reg_cache, rt); @@ -768,9 +830,10 @@ static void rec_special_XOR(struct lightrec_cstate *state, _jit_name(block->_jit, __func__); jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0); + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0); - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, 0); + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.r.rs, c.r.rd, 0, 0, &rs, &rd); flags_rs = lightrec_get_reg_in_flags(reg_cache, rs); flags_rt = lightrec_get_reg_in_flags(reg_cache, rt); @@ -831,7 +894,7 @@ static void rec_alu_shift(struct lightrec_cstate *state, const struct block *blo struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; - u8 rd, rt, flags = 0; + u8 rd, rt, flags = 0, out_flags = 0; jit_note(__FILE__, __LINE__); @@ -840,13 +903,14 @@ static void rec_alu_shift(struct lightrec_cstate *state, const struct block *blo else if (code == jit_code_rshi_u) flags = REG_ZEXT; - rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags); - /* Input reg is zero-extended, if we SRL at least by one bit, we know * the output reg will be both zero-extended and sign-extended. */ + out_flags = flags; if (code == jit_code_rshi_u && c.r.imm) - flags |= REG_EXT; - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, flags); + out_flags |= REG_EXT; + + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.r.rt, c.r.rd, flags, out_flags, &rt, &rd); jit_new_node_www(code, rd, rt, c.r.imm); @@ -1048,14 +1112,16 @@ static void rec_special_DIVU(struct lightrec_cstate *state, } static void rec_alu_mv_lo_hi(struct lightrec_cstate *state, - const struct block *block, u8 dst, u8 src) + const struct block *block, u16 offset, + u8 dst, u8 src) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; jit_note(__FILE__, __LINE__); - src = lightrec_alloc_reg_in(reg_cache, _jit, src, 0); - dst = lightrec_alloc_reg_out(reg_cache, _jit, dst, REG_EXT); + + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + src, dst, 0, REG_EXT, &src, &dst); jit_extr_i(dst, src); @@ -1069,7 +1135,7 @@ static void rec_special_MFHI(struct lightrec_cstate *state, union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_alu_mv_lo_hi(state, block, c.r.rd, REG_HI); + rec_alu_mv_lo_hi(state, block, offset, c.r.rd, REG_HI); } static void rec_special_MTHI(struct lightrec_cstate *state, @@ -1078,7 +1144,7 @@ static void rec_special_MTHI(struct lightrec_cstate *state, union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_alu_mv_lo_hi(state, block, REG_HI, c.r.rs); + rec_alu_mv_lo_hi(state, block, offset, REG_HI, c.r.rs); } static void rec_special_MFLO(struct lightrec_cstate *state, @@ -1087,7 +1153,7 @@ static void rec_special_MFLO(struct lightrec_cstate *state, union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_alu_mv_lo_hi(state, block, c.r.rd, REG_LO); + rec_alu_mv_lo_hi(state, block, offset, c.r.rd, REG_LO); } static void rec_special_MTLO(struct lightrec_cstate *state, @@ -1096,7 +1162,7 @@ static void rec_special_MTLO(struct lightrec_cstate *state, union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs); + rec_alu_mv_lo_hi(state, block, offset, REG_LO, c.r.rs); } static void call_to_c_wrapper(struct lightrec_cstate *state, @@ -1213,11 +1279,11 @@ static void rec_store_memory(struct lightrec_cstate *cstate, bool add_imm = c.i.imm && ((!state->mirrors_mapped && !no_mask) || (invalidate && ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); - bool need_tmp = !no_mask || addr_offset || add_imm || invalidate; + bool need_tmp = !no_mask || add_imm || invalidate; bool swc2 = c.i.op == OP_SWC2; u8 in_reg = swc2 ? REG_TEMP : c.i.rt; + s8 reg_imm; - rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0); rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); if (need_tmp) tmp = lightrec_alloc_reg_temp(reg_cache, _jit); @@ -1234,18 +1300,29 @@ static void rec_store_memory(struct lightrec_cstate *cstate, } if (!no_mask) { - jit_andi(tmp, addr_reg, addr_mask); + reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, + addr_mask); + + jit_andr(tmp, addr_reg, reg_imm); addr_reg = tmp; + + lightrec_free_reg(reg_cache, reg_imm); } if (addr_offset) { + reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, + addr_offset); tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); - jit_addi(tmp2, addr_reg, addr_offset); + jit_addr(tmp2, addr_reg, reg_imm); addr_reg2 = tmp2; + + lightrec_free_reg(reg_cache, reg_imm); } else { addr_reg2 = addr_reg; } + rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0); + if (is_big_endian() && swap_code && in_reg) { tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); @@ -1341,33 +1418,38 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_end; bool swc2 = c.i.op == OP_SWC2; - bool offset_ram_or_scratch = state->offset_ram || state->offset_scratch; - u8 tmp, tmp2, rs, rt, in_reg = swc2 ? REG_TEMP : c.i.rt; + u8 tmp, tmp2 = 0, rs, rt, in_reg = swc2 ? REG_TEMP : c.i.rt; + u32 addr_mask; + s32 reg_imm; s16 imm; jit_note(__FILE__, __LINE__); rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); - if (offset_ram_or_scratch) - tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + if (state->mirrors_mapped) + addr_mask = 0x1f800000 | (4 * RAM_SIZE - 1); + else + addr_mask = 0x1f800000 | (RAM_SIZE - 1); + + reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, addr_mask); /* Convert to KUNSEG and avoid RAM mirrors */ - if (state->mirrors_mapped) { - imm = (s16)c.i.imm; - jit_andi(tmp, rs, 0x1f800000 | (4 * RAM_SIZE - 1)); - } else if (c.i.imm) { + if (!state->mirrors_mapped && c.i.imm) { imm = 0; jit_addi(tmp, rs, (s16)c.i.imm); - jit_andi(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1)); + jit_andr(tmp, tmp, reg_imm); } else { - imm = 0; - jit_andi(tmp, rs, 0x1f800000 | (RAM_SIZE - 1)); + imm = (s16)c.i.imm; + jit_andr(tmp, rs, reg_imm); } lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, reg_imm); if (state->offset_ram != state->offset_scratch) { + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + to_not_ram = jit_bmsi(tmp, BIT(28)); jit_movi(tmp2, state->offset_ram); @@ -1378,10 +1460,11 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, jit_movi(tmp2, state->offset_scratch); jit_patch(to_end); } else if (state->offset_ram) { - jit_movi(tmp2, state->offset_ram); + tmp2 = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, + state->offset_ram); } - if (offset_ram_or_scratch) { + if (state->offset_ram || state->offset_scratch) { jit_addr(tmp, tmp, tmp2); lightrec_free_reg(reg_cache, tmp2); } @@ -1415,6 +1498,8 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block bool swc2 = c.i.op == OP_SWC2; u8 tmp, tmp2, tmp3, masked_reg, rs, rt; u8 in_reg = swc2 ? REG_TEMP : c.i.rt; + u32 addr_mask = 0x1f800000 | (ram_size - 1); + s32 reg_imm; jit_note(__FILE__, __LINE__); @@ -1422,15 +1507,18 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0); + reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, addr_mask); + /* Convert to KUNSEG and avoid RAM mirrors */ if (c.i.imm) { jit_addi(tmp2, rs, (s16)c.i.imm); - jit_andi(tmp2, tmp2, 0x1f800000 | (ram_size - 1)); + jit_andr(tmp2, tmp2, reg_imm); } else { - jit_andi(tmp2, rs, 0x1f800000 | (ram_size - 1)); + jit_andr(tmp2, rs, reg_imm); } lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, reg_imm); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); if (state->offset_ram != state->offset_scratch) { @@ -1501,7 +1589,7 @@ static void rec_store(struct lightrec_cstate *state, u32 flags = block->opcode_list[offset].flags; u32 mode = LIGHTREC_FLAGS_GET_IO_MODE(flags); bool no_invalidate = op_flag_no_invalidate(flags) || - state->state->invalidate_from_dma_only; + (state->state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY); union code c = block->opcode_list[offset].c; bool is_swc2 = c.i.op == OP_SWC2; @@ -1598,6 +1686,7 @@ static void rec_load_memory(struct lightrec_cstate *cstate, u8 rs, rt, out_reg, addr_reg, flags = REG_EXT; bool no_mask = op_flag_no_mask(op->flags); union code c = op->c; + s8 reg_imm; s16 imm; if (load_delay || c.i.op == OP_LWC2) @@ -1623,13 +1712,23 @@ static void rec_load_memory(struct lightrec_cstate *cstate, } if (!no_mask) { - jit_andi(rt, addr_reg, addr_mask); + reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, + addr_mask); + + jit_andr(rt, addr_reg, reg_imm); addr_reg = rt; + + lightrec_free_reg(reg_cache, reg_imm); } if (addr_offset) { - jit_addi(rt, addr_reg, addr_offset); + reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, + addr_offset); + + jit_addr(rt, addr_reg, reg_imm); addr_reg = rt; + + lightrec_free_reg(reg_cache, reg_imm); } jit_new_node_www(code, rt, addr_reg, imm); @@ -1700,6 +1799,9 @@ static void rec_load_direct(struct lightrec_cstate *cstate, jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2; u8 tmp, rs, rt, out_reg, addr_reg, flags = REG_EXT; union code c = op->c; + s32 addr_mask; + u32 reg_imm; + s8 offt_reg; s16 imm; if (load_delay || c.i.op == OP_LWC2) @@ -1734,17 +1836,36 @@ static void rec_load_direct(struct lightrec_cstate *cstate, if (state->offset_ram == state->offset_bios && state->offset_ram == state->offset_scratch) { + if (!state->mirrors_mapped) + addr_mask = 0x1f800000 | (RAM_SIZE - 1); + else + addr_mask = 0x1fffffff; + + reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, + addr_mask); if (!state->mirrors_mapped) { jit_andi(tmp, addr_reg, BIT(28)); jit_rshi_u(tmp, tmp, 28 - 22); - jit_ori(tmp, tmp, 0x1f800000 | (RAM_SIZE - 1)); + jit_orr(tmp, tmp, reg_imm); jit_andr(rt, addr_reg, tmp); } else { - jit_andi(rt, addr_reg, 0x1fffffff); + jit_andr(rt, addr_reg, reg_imm); } - if (state->offset_ram) - jit_movi(tmp, state->offset_ram); + lightrec_free_reg(reg_cache, reg_imm); + + if (state->offset_ram) { + offt_reg = lightrec_get_reg_with_value(reg_cache, + state->offset_ram); + if (offt_reg < 0) { + jit_movi(tmp, state->offset_ram); + lightrec_temp_set_value(reg_cache, tmp, + state->offset_ram); + } else { + lightrec_free_reg(reg_cache, tmp); + tmp = offt_reg; + } + } } else { to_not_ram = jit_bmsi(addr_reg, BIT(28)); @@ -2101,7 +2222,7 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) if (!op_flag_no_ds(block->opcode_list[offset].flags) && (c.r.rd == 12 || c.r.rd == 13)) { - state->cycles += lightrec_cycles_of_opcode(c); + state->cycles += lightrec_cycles_of_opcode(state->state, c); lightrec_emit_eob(state, block, offset + 1); } } @@ -2477,11 +2598,30 @@ static void rec_meta_MOV(struct lightrec_cstate *state, union code c = op->c; jit_state_t *_jit = block->_jit; bool unload_rd; + bool unload_rs, discard_rs; u8 rs, rd; _jit_name(block->_jit, __func__); jit_note(__FILE__, __LINE__); + unload_rs = OPT_EARLY_UNLOAD + && LIGHTREC_FLAGS_GET_RS(op->flags) == LIGHTREC_REG_UNLOAD; + discard_rs = OPT_EARLY_UNLOAD + && LIGHTREC_FLAGS_GET_RS(op->flags) == LIGHTREC_REG_DISCARD; + + if ((unload_rs || discard_rs) && c.m.rs) { + /* If the source register is going to be unloaded or discarded, + * then we can simply mark its host register as now pointing to + * the destination register. */ + pr_debug("Remap %s to %s at offset 0x%x\n", + lightrec_reg_name(c.m.rs), lightrec_reg_name(c.m.rd), + offset << 2); + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0); + lightrec_remap_reg(reg_cache, _jit, rs, c.m.rd, discard_rs); + lightrec_free_reg(reg_cache, rs); + return; + } + unload_rd = OPT_EARLY_UNLOAD && LIGHTREC_FLAGS_GET_RD(op->flags) == LIGHTREC_REG_UNLOAD; @@ -2537,8 +2677,8 @@ static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state, _jit_name(block->_jit, __func__); jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0); - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT); + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.m.rs, c.m.rd, 0, REG_EXT, &rs, &rd); if (c.m.op == OP_META_EXTC) jit_extr_c(rd, rs); @@ -2622,8 +2762,9 @@ static void rec_meta_COM(struct lightrec_cstate *state, u8 rd, rs, flags; jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0); - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, 0); + + rec_alloc_rs_rd(reg_cache, _jit, &block->opcode_list[offset], + c.m.rs, c.m.rd, 0, 0, &rs, &rd); flags = lightrec_get_reg_in_flags(reg_cache, rs); diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c index 80a07f32..37264d3e 100644 --- a/deps/lightrec/interpreter.c +++ b/deps/lightrec/interpreter.c @@ -74,7 +74,7 @@ static inline u32 jump_skip(struct interpreter *inter) static inline u32 jump_next(struct interpreter *inter) { - inter->cycles += lightrec_cycles_of_opcode(inter->op->c); + inter->cycles += lightrec_cycles_of_opcode(inter->state, inter->op->c); if (unlikely(inter->delay_slot)) return 0; @@ -84,7 +84,7 @@ static inline u32 jump_next(struct interpreter *inter) static inline u32 jump_after_branch(struct interpreter *inter) { - inter->cycles += lightrec_cycles_of_opcode(inter->op->c); + inter->cycles += lightrec_cycles_of_opcode(inter->state, inter->op->c); if (unlikely(inter->delay_slot)) return 0; @@ -100,11 +100,11 @@ static void update_cycles_before_branch(struct interpreter *inter) u32 cycles; if (!inter->delay_slot) { - cycles = lightrec_cycles_of_opcode(inter->op->c); + cycles = lightrec_cycles_of_opcode(inter->state, inter->op->c); if (!op_flag_no_ds(inter->op->flags) && has_delay_slot(inter->op->c)) - cycles += lightrec_cycles_of_opcode(next_op(inter)->c); + cycles += lightrec_cycles_of_opcode(inter->state, next_op(inter)->c); inter->cycles += cycles; inter->state->current_cycle += inter->cycles; @@ -155,7 +155,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) bool run_first_op = false, dummy_ld = false, save_rs = false, load_in_ds, branch_in_ds = false, branch_at_addr = false, branch_taken; - u32 old_rs, new_rs, new_rt; + u32 old_rs, new_rt, new_rs = 0; u32 next_pc, ds_next_pc; u32 cause, epc; @@ -236,7 +236,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) branch_taken = is_branch_taken(reg_cache, op_next); pr_debug("Target of impossible branch is a branch, " "%staken.\n", branch_taken ? "" : "not "); - inter->cycles += lightrec_cycles_of_opcode(op_next); + inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next); old_rs = reg_cache[op_next.r.rs]; } else { new_op.c = op_next; @@ -252,7 +252,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) reg_cache[op->r.rs] = old_rs; } - inter->cycles += lightrec_cycles_of_opcode(op_next); + inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next); } } else { next_pc = int_get_ds_pc(inter, 2); @@ -293,7 +293,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) if (dummy_ld) reg_cache[op->r.rt] = new_rt; - inter->cycles += lightrec_cycles_of_opcode(op->c); + inter->cycles += lightrec_cycles_of_opcode(inter->state, op->c); if (branch_at_addr && branch_taken) { /* If the branch at the target of the branch opcode is taken, @@ -306,7 +306,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) inter2.op = &new_op; inter2.block = NULL; - inter->cycles += lightrec_cycles_of_opcode(op_next); + inter->cycles += lightrec_cycles_of_opcode(inter->state, op_next); pr_debug("Running delay slot of branch at target of impossible " "branch\n"); @@ -1191,7 +1191,7 @@ static u32 lightrec_emulate_block_list(struct lightrec_state *state, pc = lightrec_int_op(&inter); /* Add the cycles of the last branch */ - inter.cycles += lightrec_cycles_of_opcode(inter.op->c); + inter.cycles += lightrec_cycles_of_opcode(inter.state, inter.op->c); state->current_cycle += inter.cycles; diff --git a/deps/lightrec/lightrec-config.h.cmakein b/deps/lightrec/lightrec-config.h.cmakein index ed29ee4d..9086a7ae 100644 --- a/deps/lightrec/lightrec-config.h.cmakein +++ b/deps/lightrec/lightrec-config.h.cmakein @@ -23,6 +23,7 @@ #cmakedefine01 OPT_FLAG_IO #cmakedefine01 OPT_FLAG_MULT_DIV #cmakedefine01 OPT_EARLY_UNLOAD +#cmakedefine01 OPT_PRELOAD_PC #endif /* __LIGHTREC_CONFIG_H__ */ diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index 1f8e10b5..456d7af2 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -51,7 +51,11 @@ #define SET_DEFAULT_ELM(table, value) [0] = NULL #endif -#define fallthrough do {} while (0) /* fall-through */ +#if __has_attribute(__fallthrough__) +# define fallthrough __attribute__((__fallthrough__)) +#else +# define fallthrough do {} while (0) /* fallthrough */ +#endif #define container_of(ptr, type, member) \ ((type *)((void *)(ptr) - offsetof(type, member))) @@ -73,6 +77,7 @@ #define BLOCK_IS_DEAD BIT(3) #define BLOCK_IS_MEMSET BIT(4) #define BLOCK_NO_OPCODE_LIST BIT(5) +#define BLOCK_PRELOAD_PC BIT(6) #define RAM_SIZE 0x200000 #define BIOS_SIZE 0x80000 @@ -144,6 +149,7 @@ struct lightrec_cstate { struct lightrec_branch local_branches[512]; struct lightrec_branch_target targets[512]; + u16 movi_temp[32]; unsigned int nb_local_branches; unsigned int nb_targets; unsigned int cycles; @@ -164,6 +170,7 @@ struct lightrec_state { u32 target_cycle; u32 exit_flags; u32 old_cycle_counter; + u32 cycles_per_op; struct block *dispatcher, *c_wrapper_block; void *c_wrappers[C_WRAPPERS_COUNT]; void *wrappers_eps[C_WRAPPERS_COUNT]; @@ -183,9 +190,9 @@ struct lightrec_state { unsigned int nb_maps; const struct lightrec_mem_map *maps; uintptr_t offset_ram, offset_bios, offset_scratch, offset_io; + u32 opt_flags; _Bool with_32bit_lut; _Bool mirrors_mapped; - _Bool invalidate_from_dma_only; void *code_lut[]; }; @@ -265,7 +272,7 @@ static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm) offset += op_flag_no_ds(flags); - return block->pc + (offset + imm << 2); + return block->pc + ((offset + imm) << 2); } static inline u32 get_branch_pc(const struct block *block, u16 offset, s16 imm) @@ -274,7 +281,7 @@ static inline u32 get_branch_pc(const struct block *block, u16 offset, s16 imm) offset -= op_flag_no_ds(flags); - return block->pc + (offset + imm << 2); + return block->pc + ((offset + imm) << 2); } void lightrec_mtc(struct lightrec_state *state, union code op, u8 reg, u32 data); @@ -291,7 +298,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block); void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *list); -__cnst unsigned int lightrec_cycles_of_opcode(union code code); +unsigned int lightrec_cycles_of_opcode(const struct lightrec_state *state, + union code code); static inline u8 get_mult_div_lo(union code c) { @@ -347,7 +355,7 @@ static inline u8 block_clear_flags(struct block *block, u8 mask) static inline _Bool can_sign_extend(s32 value, u8 order) { - return (u32)(value >> order - 1) + 1 < 2; + return ((u32)(value >> (order - 1)) + 1) < 2; } static inline _Bool can_zero_extend(u32 value, u8 order) diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 45c3149d..79db4477 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -40,7 +40,7 @@ static void lightrec_default_sb(struct lightrec_state *state, u32 opcode, { *(u8 *)host = data; - if (!state->invalidate_from_dma_only) + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate(state, addr, 1); } @@ -49,7 +49,7 @@ static void lightrec_default_sh(struct lightrec_state *state, u32 opcode, { *(u16 *)host = HTOLE16(data); - if (!state->invalidate_from_dma_only) + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate(state, addr, 2); } @@ -58,7 +58,7 @@ static void lightrec_default_sw(struct lightrec_state *state, u32 opcode, { *(u32 *)host = HTOLE32(data); - if (!state->invalidate_from_dma_only) + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate(state, addr, 4); } @@ -524,7 +524,7 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) status = state->regs.cp0[12]; /* Handle software interrupts */ - if (!!(status & cause & 0x300) & status) + if ((!!(status & cause & 0x300)) & status) lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); /* Handle hardware interrupts */ @@ -1027,7 +1027,7 @@ static u32 lightrec_memset(struct lightrec_state *state) kunseg_pc, (uintptr_t)host, length); memset(host, 0, length); - if (!state->invalidate_from_dma_only) + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate_map(state, map, kunseg_pc, length); /* Rough estimation of the number of cycles consumed */ @@ -1080,8 +1080,8 @@ static void update_cycle_counter_after_c(jit_state_t *_jit) static void sync_next_pc(jit_state_t *_jit) { if (lightrec_store_next_pc()) { - jit_ldxi_i(JIT_V0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, next_pc)); + jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, next_pc)); } } @@ -1323,9 +1323,10 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) return (union code) LE32TOH(*code); } -__cnst unsigned int lightrec_cycles_of_opcode(union code code) +unsigned int lightrec_cycles_of_opcode(const struct lightrec_state *state, + union code code) { - return 2; + return state->cycles_per_op; } void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops) @@ -1555,7 +1556,9 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, block->_jit = _jit; lightrec_regcache_reset(cstate->reg_cache); - lightrec_preload_pc(cstate->reg_cache, _jit); + + if (OPT_PRELOAD_PC && (block->flags & BLOCK_PRELOAD_PC)) + lightrec_preload_pc(cstate->reg_cache, _jit); cstate->cycles = 0; cstate->nb_local_branches = 0; @@ -1593,7 +1596,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, #endif } - cstate->cycles += lightrec_cycles_of_opcode(elm->c); + cstate->cycles += lightrec_cycles_of_opcode(state, elm->c); } for (i = 0; i < cstate->nb_local_branches; i++) { @@ -1918,6 +1921,7 @@ struct lightrec_state * lightrec_init(char *argv0, state->tlsf = tlsf; state->with_32bit_lut = with_32bit_lut; state->in_delay_slot_n = 0xff; + state->cycles_per_op = 2; state->block_cache = lightrec_blockcache_init(state); if (!state->block_cache) @@ -2064,12 +2068,12 @@ void lightrec_invalidate_all(struct lightrec_state *state) memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE); } -void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only) +void lightrec_set_unsafe_opt_flags(struct lightrec_state *state, u32 flags) { - if (state->invalidate_from_dma_only != dma_only) + if ((flags ^ state->opt_flags) & LIGHTREC_OPT_INV_DMA_ONLY) lightrec_invalidate_all(state); - state->invalidate_from_dma_only = dma_only; + state->opt_flags = flags; } void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags) @@ -2112,3 +2116,8 @@ struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state) { return &state->regs; } + +void lightrec_set_cycles_per_opcode(struct lightrec_state *state, u32 cycles) +{ + state->cycles_per_op = cycles; +} diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h index 0798b581..9779951b 100644 --- a/deps/lightrec/lightrec.h +++ b/deps/lightrec/lightrec.h @@ -64,6 +64,10 @@ struct lightrec_mem_map; #define LIGHTREC_EXIT_SEGFAULT (1 << 3) #define LIGHTREC_EXIT_NOMEM (1 << 4) +/* Unsafe optimizations flags */ +#define LIGHTREC_OPT_INV_DMA_ONLY (1 << 0) +#define LIGHTREC_OPT_SP_GP_HIT_RAM (1 << 1) + enum psx_map { PSX_MAP_KERNEL_USER_RAM, PSX_MAP_BIOS, @@ -129,12 +133,12 @@ __api u32 lightrec_run_interpreter(struct lightrec_state *state, __api void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len); __api void lightrec_invalidate_all(struct lightrec_state *state); -__api void lightrec_set_invalidate_mode(struct lightrec_state *state, - _Bool dma_only); __api void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags); __api u32 lightrec_exit_flags(struct lightrec_state *state); +__api void lightrec_set_unsafe_opt_flags(struct lightrec_state *state, u32 flags); + __api __cnst struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state); @@ -142,6 +146,7 @@ __api u32 lightrec_current_cycle_count(const struct lightrec_state *state); __api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles); __api void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles); +__api void lightrec_set_cycles_per_opcode(struct lightrec_state *state, u32 cycles); #ifdef __cplusplus }; diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 58d9d56c..38d77d8e 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -596,7 +596,7 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset, pr_debug("Convert LHU+SLL+SRA to LH\n"); v[ldop->i.rt].known = 0; - v[ldop->i.rt].sign = 0xffffff80 << 24 - curr->r.imm; + v[ldop->i.rt].sign = 0xffffff80 << (24 - curr->r.imm); } } @@ -647,6 +647,31 @@ lightrec_remove_useless_lui(struct block *block, unsigned int offset, } } +static void lightrec_lui_to_movi(struct block *block, unsigned int offset) +{ + struct opcode *ori, *lui = &block->opcode_list[offset]; + int next; + + if (lui->i.op != OP_LUI) + return; + + next = find_next_reader(block->opcode_list, offset + 1, lui->i.rt); + if (next > 0) { + ori = &block->opcode_list[next]; + + switch (ori->i.op) { + case OP_ORI: + case OP_ADDI: + case OP_ADDIU: + if (ori->i.rs == ori->i.rt && ori->i.imm) { + ori->flags |= LIGHTREC_MOVI; + lui->flags |= LIGHTREC_MOVI; + } + break; + } + } +} + static void lightrec_modify_lui(struct block *block, unsigned int offset) { union code c, *lui = &block->opcode_list[offset].c; @@ -669,7 +694,7 @@ static void lightrec_modify_lui(struct block *block, unsigned int offset) } pr_debug("Convert LUI at offset 0x%x to kuseg\n", - i - 1 << 2); + (i - 1) << 2); lui->i.imm = kunseg(lui->i.imm << 16) >> 16; break; } @@ -947,6 +972,8 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl if (i == 0 || !has_delay_slot(list[i - 1].c)) lightrec_modify_lui(block, i); lightrec_remove_useless_lui(block, i, v); + if (i == 0 || !has_delay_slot(list[i - 1].c)) + lightrec_lui_to_movi(block, i); break; /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU @@ -1743,7 +1770,10 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) /* Assume that all I/O operations that target * $sp or $gp will always only target a mapped * memory (RAM, BIOS, scratchpad). */ - list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + if (state->opt_flags & LIGHTREC_OPT_SP_GP_HIT_RAM) + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM); + else + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); } fallthrough; @@ -2157,6 +2187,66 @@ static int lightrec_replace_memset(struct lightrec_state *state, struct block *b return 0; } +static int lightrec_test_preload_pc(struct lightrec_state *state, struct block *block) +{ + unsigned int i; + union code c; + u32 flags; + + for (i = 0; i < block->nb_ops; i++) { + c = block->opcode_list[i].c; + flags = block->opcode_list[i].flags; + + if (op_flag_sync(flags)) + break; + + switch (c.i.op) { + case OP_J: + case OP_JAL: + block->flags |= BLOCK_PRELOAD_PC; + return 0; + + case OP_REGIMM: + switch (c.r.rt) { + case OP_REGIMM_BLTZAL: + case OP_REGIMM_BGEZAL: + block->flags |= BLOCK_PRELOAD_PC; + return 0; + default: + break; + } + fallthrough; + case OP_BEQ: + case OP_BNE: + case OP_BLEZ: + case OP_BGTZ: + if (!op_flag_local_branch(flags)) { + block->flags |= BLOCK_PRELOAD_PC; + return 0; + } + + case OP_SPECIAL: + switch (c.r.op) { + case OP_SPECIAL_JALR: + if (c.r.rd) { + block->flags |= BLOCK_PRELOAD_PC; + return 0; + } + break; + case OP_SPECIAL_SYSCALL: + case OP_SPECIAL_BREAK: + block->flags |= BLOCK_PRELOAD_PC; + return 0; + default: + break; + } + break; + } + } + + return 0; +} + static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = { IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence), IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset), @@ -2170,6 +2260,7 @@ static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block * IF_OPT(OPT_FLAG_IO, &lightrec_flag_io), IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs), IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload), + IF_OPT(OPT_PRELOAD_PC, &lightrec_test_preload_pc), }; int lightrec_optimize(struct lightrec_state *state, struct block *block) diff --git a/deps/lightrec/regcache.c b/deps/lightrec/regcache.c index d9926d38..45d77c6c 100644 --- a/deps/lightrec/regcache.c +++ b/deps/lightrec/regcache.c @@ -296,6 +296,21 @@ void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value) nreg->value = value; } +u8 lightrec_alloc_reg_temp_with_value(struct regcache *cache, + jit_state_t *_jit, intptr_t value) +{ + s8 reg; + + reg = lightrec_get_reg_with_value(cache, value); + if (reg < 0) { + reg = lightrec_alloc_reg_temp(cache, _jit); + jit_movi((u8)reg, value); + lightrec_temp_set_value(cache, (u8)reg, value); + } + + return (u8)reg; +} + u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u16 reg, u8 flags) { @@ -400,6 +415,22 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, return jit_reg; } +void lightrec_remap_reg(struct regcache *cache, jit_state_t *_jit, + u8 jit_reg, u16 reg_out, bool discard) +{ + struct native_register *nreg; + + lightrec_discard_reg_if_loaded(cache, reg_out); + + nreg = lightning_reg_to_lightrec(cache, jit_reg); + clean_reg(_jit, nreg, jit_reg, !discard); + + nreg->output = true; + nreg->emulated_register = reg_out; + nreg->extend = nreg->extended; + nreg->zero_extend = nreg->zero_extended; +} + static bool reg_pc_is_mapped(struct regcache *cache) { struct native_register *nreg = lightning_reg_to_lightrec(cache, JIT_V0); diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h index abf70532..b919ce08 100644 --- a/deps/lightrec/regcache.h +++ b/deps/lightrec/regcache.h @@ -50,6 +50,9 @@ u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u16 reg, u8 flags); +void lightrec_remap_reg(struct regcache *cache, jit_state_t *_jit, + u8 jit_reg, u16 reg_out, _Bool discard); + void lightrec_load_imm(struct regcache *cache, jit_state_t *_jit, u8 jit_reg, u32 pc, u32 imm); void lightrec_load_next_pc(struct regcache *cache, jit_state_t *_jit, u8 reg); @@ -58,6 +61,8 @@ void lightrec_load_next_pc_imm(struct regcache *cache, s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value); void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value); +u8 lightrec_alloc_reg_temp_with_value(struct regcache *cache, + jit_state_t *_jit, intptr_t value); u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg); void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags); -- 2.39.2