git subrepo pull --force deps/lightrec
authorPaul Cercueil <paul@crapouillou.net>
Mon, 15 Jul 2024 20:10:13 +0000 (22:10 +0200)
committerPaul Cercueil <paul@crapouillou.net>
Mon, 15 Jul 2024 20:10:13 +0000 (22:10 +0200)
subrepo:
  subdir:   "deps/lightrec"
  merged:   "601afca8e8"
upstream:
  origin:   "https://github.com/pcercuei/lightrec.git"
  branch:   "master"
  commit:   "601afca8e8"
git-subrepo:
  version:  "0.4.6"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "110b9eb"

deps/lightrec/.gitrepo
deps/lightrec/CMakeLists.txt
deps/lightrec/arch.h [new file with mode: 0644]
deps/lightrec/emitter.c
deps/lightrec/lightning-wrapper.h
deps/lightrec/lightrec-config.h.cmakein
deps/lightrec/lightrec-private.h
deps/lightrec/lightrec.c
deps/lightrec/optimizer.c
deps/lightrec/regcache.h

index 9e04deb..6981119 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/lightrec.git
        branch = master
-       commit = bd0b82792284f22566bbfc78d8882e1e91b10516
-       parent = 1229a4ea3dea3e1e47c46cd7afed38860fd91a57
+       commit = 601afca8e889bdda7040ff5c64f7bbd20d1d5f2c
+       parent = 459f02ad03fa10b5c403fed724d47fe5adfd5fb1
        method = merge
        cmdver = 0.4.6
index c3eb6f8..6f3d53e 100644 (file)
@@ -85,6 +85,10 @@ option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/L
 option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON)
 option(OPT_PRELOAD_PC "(optimization) Preload PC value into register" ON)
 
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "SH4|sh4")
+       option(OPT_SH4_USE_GBR "(SH4 optimization) Use GBR register for the state pointer" OFF)
+endif()
+
 target_include_directories(lightrec PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
 
 if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang")
diff --git a/deps/lightrec/arch.h b/deps/lightrec/arch.h
new file mode 100644 (file)
index 0000000..7df9e75
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2024 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTREC_ARCH_H__
+#define __LIGHTREC_ARCH_H__
+
+#include <lightning.h>
+#include <stdbool.h>
+
+static bool arch_has_fast_mask(void)
+{
+#if __mips_isa_rev >= 2
+       /* On MIPS32 >= r2, we can use extr / ins instructions */
+       return true;
+#endif
+#ifdef __powerpc__
+       /* On PowerPC, we can use the RLWINM instruction */
+       return true;
+#endif
+#ifdef __aarch64__
+       /* Aarch64 can use the UBFX instruction */
+       return true;
+#endif
+#if defined(__x86__) || defined(__x86_64__)
+       /* x86 doesn't have enough registers, using cached values make
+        * little sense. Using jit_andi() will give a better result as it will
+        * use bit-shifts for low/high masks. */
+       return true;
+#endif
+
+       return false;
+}
+
+#endif /* __LIGHTREC_ARCH_H__ */
index 8612119..a59ff1d 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
  */
 
+#include "arch.h"
 #include "blockcache.h"
 #include "debug.h"
 #include "disassembler.h"
@@ -103,7 +104,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
 
        if (cycles && update_cycles) {
                jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
-               pr_debug("EOB: %u cycles\n", cycles);
+               pr_debug("EOB: %"PRIu32" cycles\n", cycles);
        }
 
        if (has_ds && op_flag_load_delay(ds->flags)
@@ -247,11 +248,11 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
        struct lightrec_branch *branch;
        const struct opcode *op = &block->opcode_list[offset],
                            *ds = get_delay_slot(block->opcode_list, offset);
-       jit_node_t *addr;
        bool is_forward = (s16)op->i.imm >= 0;
        int op_cycles = lightrec_cycles_of_opcode(state->state, op->c);
        u32 target_offset, cycles = state->cycles + op_cycles;
        bool no_indirection = false;
+       jit_node_t *addr = NULL;
        u32 next_pc;
        u8 rs, rt;
 
@@ -308,7 +309,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
 
                target_offset = offset + 1 + (s16)op->i.imm
                        - !!op_flag_no_ds(op->flags);
-               pr_debug("Adding local branch to offset 0x%x\n",
+               pr_debug("Adding local branch to offset 0x%"PRIx32"\n",
                         target_offset << 2);
                branch = &state->local_branches[
                        state->nb_local_branches++];
@@ -941,7 +942,7 @@ static void rec_alu_mult(struct lightrec_cstate *state,
        u8 reg_lo = get_mult_div_lo(c);
        u8 reg_hi = get_mult_div_hi(c);
        jit_state_t *_jit = block->_jit;
-       u8 lo, hi, rs, rt, rflags = 0;
+       u8 lo, hi = 0, rs, rt, rflags = 0;
        bool no_lo = op_flag_no_lo(flags);
        bool no_hi = op_flag_no_hi(flags);
 
@@ -1276,10 +1277,16 @@ static void rec_and_mask(struct lightrec_cstate *cstate,
        struct regcache *reg_cache = cstate->reg_cache;
        u8 reg_imm;
 
-       reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, mask);
-       jit_andr(reg_out, reg_in, reg_imm);
+       if (arch_has_fast_mask()
+           && (is_low_mask(mask) || is_high_mask(mask))) {
+               jit_andi(reg_out, reg_in, mask);
+       } else {
+               reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit,
+                                                            mask);
+               jit_andr(reg_out, reg_in, reg_imm);
 
-       lightrec_free_reg(reg_cache, reg_imm);
+               lightrec_free_reg(reg_cache, reg_imm);
+       }
 }
 
 static void rec_store_memory(struct lightrec_cstate *cstate,
index 88e93e0..cbf3edc 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <lightning.h>
 
+#include "lightrec-config.h"
+
 #if __WORDSIZE == 32
 
 #define jit_ldxi_ui(u,v,w)     jit_ldxi_i(u,v,w)
 
 #define jit_b()                        jit_beqr(0, 0)
 
+#if defined(__sh__) && OPT_SH4_USE_GBR
+#define jit_add_state(u,v)                                             \
+       do {                                                            \
+               jit_new_node_ww(jit_code_movr,_R0,LIGHTREC_REG_STATE);  \
+               jit_new_node_www(jit_code_addr,u,v,_R0);                \
+       } while (0)
+#else
 #define jit_add_state(u,v)     jit_addr(u,v,LIGHTREC_REG_STATE)
+#endif
 
 #endif /* __LIGHTNING_WRAPPER_H__ */
index 9086a7a..f92509a 100644 (file)
@@ -25,5 +25,7 @@
 #cmakedefine01 OPT_EARLY_UNLOAD
 #cmakedefine01 OPT_PRELOAD_PC
 
+#cmakedefine01 OPT_SH4_USE_GBR
+
 #endif /* __LIGHTREC_CONFIG_H__ */
 
index 920008c..5e047aa 100644 (file)
@@ -372,6 +372,16 @@ static inline _Bool can_zero_extend(u32 value, u8 order)
       return (value >> order) == 0;
 }
 
+static inline _Bool is_low_mask(u32 imm)
+{
+       return imm & 1 ? popcount32(imm + 1) <= 1 : 0;
+}
+
+static inline _Bool is_high_mask(u32 imm)
+{
+       return imm ? popcount32(imm + BIT(ctz32(imm))) == 0 : 0;
+}
+
 static inline const struct opcode *
 get_delay_slot(const struct opcode *list, u16 i)
 {
index ec26bff..ae17053 100644 (file)
@@ -959,7 +959,7 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        jit_tramp(256);
 
        /* Load pointer to C wrapper */
-       jit_addr(JIT_R1, JIT_R1, LIGHTREC_REG_STATE);
+       jit_add_state(JIT_R1, JIT_R1);
        jit_ldxi(JIT_R1, JIT_R1, lightrec_offset(c_wrappers));
 
        jit_epilog();
@@ -1046,7 +1046,7 @@ static u32 lightrec_memset(struct lightrec_state *state)
                return 0;
        }
 
-       pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %u bytes\n",
+       pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %"PRIu32" bytes\n",
                 kunseg_pc, (uintptr_t)host, length);
        memset(host, 0, length);
 
@@ -1624,7 +1624,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
        for (i = 0; i < cstate->nb_local_branches; i++) {
                struct lightrec_branch *branch = &cstate->local_branches[i];
 
-               pr_debug("Patch local branch to offset 0x%x\n",
+               pr_debug("Patch local branch to offset 0x%"PRIx32"\n",
                         branch->target << 2);
 
                if (branch->target == 0) {
index 0a3655b..991ef77 100644 (file)
@@ -1172,7 +1172,7 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                        break;
                                }
 
-                               pr_debug("Multiply by power-of-two: %u\n",
+                               pr_debug("Multiply by power-of-two: %"PRIu32"\n",
                                         v[op->r.rt].value);
 
                                if (op->r.op == OP_SPECIAL_MULT)
@@ -1440,14 +1440,12 @@ static int lightrec_swap_load_delays(struct lightrec_state *state,
                        switch (next.i.op) {
                        case OP_LWL:
                        case OP_LWR:
-                       case OP_REGIMM:
-                       case OP_BEQ:
-                       case OP_BNE:
-                       case OP_BLEZ:
-                       case OP_BGTZ:
                                continue;
                        }
 
+                       if (has_delay_slot(next))
+                               continue;
+
                        if (opcode_reads_register(next, c.i.rt)
                            && !opcode_writes_register(next, c.i.rs)) {
                                pr_debug("Swapping opcodes at offset 0x%x to "
@@ -1481,7 +1479,7 @@ static int lightrec_local_branches(struct lightrec_state *state, struct block *b
 
                offset = i + 1 + (s16)list->c.i.imm;
 
-               pr_debug("Found local branch to offset 0x%x\n", offset << 2);
+               pr_debug("Found local branch to offset 0x%"PRIx32"\n", offset << 2);
 
                ds = get_delay_slot(block->opcode_list, i);
                if (op_flag_load_delay(ds->flags) && opcode_is_load(ds->c)) {
index 4b7cb89..23a775c 100644 (file)
@@ -7,9 +7,15 @@
 #define __REGCACHE_H__
 
 #include "lightning-wrapper.h"
+#include "lightrec-config.h"
 
-#define NUM_REGS (JIT_V_NUM - 1)
-#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
+#if defined(__sh__) && OPT_SH4_USE_GBR
+#  define NUM_REGS JIT_V_NUM
+#  define LIGHTREC_REG_STATE _GBR
+#else
+#  define NUM_REGS (JIT_V_NUM - 1)
+#  define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
+#endif
 
 #if defined(__powerpc__)
 #  define NUM_TEMPS JIT_R_NUM