From 5eecf06ddff70602526a937f6faebcd3039885bb Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 15 Jul 2024 22:10:13 +0200 Subject: [PATCH] git subrepo pull --force deps/lightrec subrepo: subdir: "deps/lightrec" merged: "601afca8e8" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "601afca8e8" git-subrepo: version: "0.4.6" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "110b9eb" --- deps/lightrec/.gitrepo | 4 +-- deps/lightrec/CMakeLists.txt | 4 +++ deps/lightrec/arch.h | 36 +++++++++++++++++++++++++ deps/lightrec/emitter.c | 21 ++++++++++----- deps/lightrec/lightning-wrapper.h | 10 +++++++ deps/lightrec/lightrec-config.h.cmakein | 2 ++ deps/lightrec/lightrec-private.h | 10 +++++++ deps/lightrec/lightrec.c | 6 ++--- deps/lightrec/optimizer.c | 12 ++++----- deps/lightrec/regcache.h | 10 +++++-- 10 files changed, 94 insertions(+), 21 deletions(-) create mode 100644 deps/lightrec/arch.h diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 9e04deb8..69811196 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = bd0b82792284f22566bbfc78d8882e1e91b10516 - parent = 1229a4ea3dea3e1e47c46cd7afed38860fd91a57 + commit = 601afca8e889bdda7040ff5c64f7bbd20d1d5f2c + parent = 459f02ad03fa10b5c403fed724d47fe5adfd5fb1 method = merge cmdver = 0.4.6 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index c3eb6f8f..6f3d53e7 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -85,6 +85,10 @@ option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/L option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON) option(OPT_PRELOAD_PC "(optimization) Preload PC value into register" ON) +if (CMAKE_SYSTEM_PROCESSOR MATCHES "SH4|sh4") + option(OPT_SH4_USE_GBR "(SH4 optimization) Use GBR register for the state pointer" OFF) +endif() + target_include_directories(lightrec PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang") diff --git a/deps/lightrec/arch.h b/deps/lightrec/arch.h new file mode 100644 index 00000000..7df9e754 --- /dev/null +++ b/deps/lightrec/arch.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2024 Paul Cercueil + */ + +#ifndef __LIGHTREC_ARCH_H__ +#define __LIGHTREC_ARCH_H__ + +#include +#include + +static bool arch_has_fast_mask(void) +{ +#if __mips_isa_rev >= 2 + /* On MIPS32 >= r2, we can use extr / ins instructions */ + return true; +#endif +#ifdef __powerpc__ + /* On PowerPC, we can use the RLWINM instruction */ + return true; +#endif +#ifdef __aarch64__ + /* Aarch64 can use the UBFX instruction */ + return true; +#endif +#if defined(__x86__) || defined(__x86_64__) + /* x86 doesn't have enough registers, using cached values make + * little sense. Using jit_andi() will give a better result as it will + * use bit-shifts for low/high masks. */ + return true; +#endif + + return false; +} + +#endif /* __LIGHTREC_ARCH_H__ */ diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 8612119f..a59ff1d7 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -3,6 +3,7 @@ * Copyright (C) 2014-2021 Paul Cercueil */ +#include "arch.h" #include "blockcache.h" #include "debug.h" #include "disassembler.h" @@ -103,7 +104,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, if (cycles && update_cycles) { jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles); - pr_debug("EOB: %u cycles\n", cycles); + pr_debug("EOB: %"PRIu32" cycles\n", cycles); } if (has_ds && op_flag_load_delay(ds->flags) @@ -247,11 +248,11 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 struct lightrec_branch *branch; const struct opcode *op = &block->opcode_list[offset], *ds = get_delay_slot(block->opcode_list, offset); - jit_node_t *addr; bool is_forward = (s16)op->i.imm >= 0; int op_cycles = lightrec_cycles_of_opcode(state->state, op->c); u32 target_offset, cycles = state->cycles + op_cycles; bool no_indirection = false; + jit_node_t *addr = NULL; u32 next_pc; u8 rs, rt; @@ -308,7 +309,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 target_offset = offset + 1 + (s16)op->i.imm - !!op_flag_no_ds(op->flags); - pr_debug("Adding local branch to offset 0x%x\n", + pr_debug("Adding local branch to offset 0x%"PRIx32"\n", target_offset << 2); branch = &state->local_branches[ state->nb_local_branches++]; @@ -941,7 +942,7 @@ static void rec_alu_mult(struct lightrec_cstate *state, u8 reg_lo = get_mult_div_lo(c); u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; - u8 lo, hi, rs, rt, rflags = 0; + u8 lo, hi = 0, rs, rt, rflags = 0; bool no_lo = op_flag_no_lo(flags); bool no_hi = op_flag_no_hi(flags); @@ -1276,10 +1277,16 @@ static void rec_and_mask(struct lightrec_cstate *cstate, struct regcache *reg_cache = cstate->reg_cache; u8 reg_imm; - reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, mask); - jit_andr(reg_out, reg_in, reg_imm); + if (arch_has_fast_mask() + && (is_low_mask(mask) || is_high_mask(mask))) { + jit_andi(reg_out, reg_in, mask); + } else { + reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, + mask); + jit_andr(reg_out, reg_in, reg_imm); - lightrec_free_reg(reg_cache, reg_imm); + lightrec_free_reg(reg_cache, reg_imm); + } } static void rec_store_memory(struct lightrec_cstate *cstate, diff --git a/deps/lightrec/lightning-wrapper.h b/deps/lightrec/lightning-wrapper.h index 88e93e06..cbf3edc3 100644 --- a/deps/lightrec/lightning-wrapper.h +++ b/deps/lightrec/lightning-wrapper.h @@ -8,6 +8,8 @@ #include +#include "lightrec-config.h" + #if __WORDSIZE == 32 #define jit_ldxi_ui(u,v,w) jit_ldxi_i(u,v,w) @@ -21,6 +23,14 @@ #define jit_b() jit_beqr(0, 0) +#if defined(__sh__) && OPT_SH4_USE_GBR +#define jit_add_state(u,v) \ + do { \ + jit_new_node_ww(jit_code_movr,_R0,LIGHTREC_REG_STATE); \ + jit_new_node_www(jit_code_addr,u,v,_R0); \ + } while (0) +#else #define jit_add_state(u,v) jit_addr(u,v,LIGHTREC_REG_STATE) +#endif #endif /* __LIGHTNING_WRAPPER_H__ */ diff --git a/deps/lightrec/lightrec-config.h.cmakein b/deps/lightrec/lightrec-config.h.cmakein index 9086a7ae..f92509ad 100644 --- a/deps/lightrec/lightrec-config.h.cmakein +++ b/deps/lightrec/lightrec-config.h.cmakein @@ -25,5 +25,7 @@ #cmakedefine01 OPT_EARLY_UNLOAD #cmakedefine01 OPT_PRELOAD_PC +#cmakedefine01 OPT_SH4_USE_GBR + #endif /* __LIGHTREC_CONFIG_H__ */ diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index 920008c2..5e047aa8 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -372,6 +372,16 @@ static inline _Bool can_zero_extend(u32 value, u8 order) return (value >> order) == 0; } +static inline _Bool is_low_mask(u32 imm) +{ + return imm & 1 ? popcount32(imm + 1) <= 1 : 0; +} + +static inline _Bool is_high_mask(u32 imm) +{ + return imm ? popcount32(imm + BIT(ctz32(imm))) == 0 : 0; +} + static inline const struct opcode * get_delay_slot(const struct opcode *list, u16 i) { diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index ec26bff7..ae170531 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -959,7 +959,7 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_tramp(256); /* Load pointer to C wrapper */ - jit_addr(JIT_R1, JIT_R1, LIGHTREC_REG_STATE); + jit_add_state(JIT_R1, JIT_R1); jit_ldxi(JIT_R1, JIT_R1, lightrec_offset(c_wrappers)); jit_epilog(); @@ -1046,7 +1046,7 @@ static u32 lightrec_memset(struct lightrec_state *state) return 0; } - pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %u bytes\n", + pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %"PRIu32" bytes\n", kunseg_pc, (uintptr_t)host, length); memset(host, 0, length); @@ -1624,7 +1624,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, for (i = 0; i < cstate->nb_local_branches; i++) { struct lightrec_branch *branch = &cstate->local_branches[i]; - pr_debug("Patch local branch to offset 0x%x\n", + pr_debug("Patch local branch to offset 0x%"PRIx32"\n", branch->target << 2); if (branch->target == 0) { diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 0a3655b9..991ef778 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -1172,7 +1172,7 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl break; } - pr_debug("Multiply by power-of-two: %u\n", + pr_debug("Multiply by power-of-two: %"PRIu32"\n", v[op->r.rt].value); if (op->r.op == OP_SPECIAL_MULT) @@ -1440,14 +1440,12 @@ static int lightrec_swap_load_delays(struct lightrec_state *state, switch (next.i.op) { case OP_LWL: case OP_LWR: - case OP_REGIMM: - case OP_BEQ: - case OP_BNE: - case OP_BLEZ: - case OP_BGTZ: continue; } + if (has_delay_slot(next)) + continue; + if (opcode_reads_register(next, c.i.rt) && !opcode_writes_register(next, c.i.rs)) { pr_debug("Swapping opcodes at offset 0x%x to " @@ -1481,7 +1479,7 @@ static int lightrec_local_branches(struct lightrec_state *state, struct block *b offset = i + 1 + (s16)list->c.i.imm; - pr_debug("Found local branch to offset 0x%x\n", offset << 2); + pr_debug("Found local branch to offset 0x%"PRIx32"\n", offset << 2); ds = get_delay_slot(block->opcode_list, i); if (op_flag_load_delay(ds->flags) && opcode_is_load(ds->c)) { diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h index 4b7cb89c..23a775ce 100644 --- a/deps/lightrec/regcache.h +++ b/deps/lightrec/regcache.h @@ -7,9 +7,15 @@ #define __REGCACHE_H__ #include "lightning-wrapper.h" +#include "lightrec-config.h" -#define NUM_REGS (JIT_V_NUM - 1) -#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1)) +#if defined(__sh__) && OPT_SH4_USE_GBR +# define NUM_REGS JIT_V_NUM +# define LIGHTREC_REG_STATE _GBR +#else +# define NUM_REGS (JIT_V_NUM - 1) +# define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1)) +#endif #if defined(__powerpc__) # define NUM_TEMPS JIT_R_NUM -- 2.39.5