[subrepo]
remote = https://github.com/pcercuei/lightrec.git
branch = master
- commit = bd0b82792284f22566bbfc78d8882e1e91b10516
- parent = 1229a4ea3dea3e1e47c46cd7afed38860fd91a57
+ commit = 601afca8e889bdda7040ff5c64f7bbd20d1d5f2c
+ parent = 459f02ad03fa10b5c403fed724d47fe5adfd5fb1
method = merge
cmdver = 0.4.6
option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON)
option(OPT_PRELOAD_PC "(optimization) Preload PC value into register" ON)
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "SH4|sh4")
+ option(OPT_SH4_USE_GBR "(SH4 optimization) Use GBR register for the state pointer" OFF)
+endif()
+
target_include_directories(lightrec PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
if (CMAKE_C_COMPILER_ID MATCHES "GNU|Clang")
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2024 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTREC_ARCH_H__
+#define __LIGHTREC_ARCH_H__
+
+#include <lightning.h>
+#include <stdbool.h>
+
+static bool arch_has_fast_mask(void)
+{
+#if __mips_isa_rev >= 2
+ /* On MIPS32 >= r2, we can use extr / ins instructions */
+ return true;
+#endif
+#ifdef __powerpc__
+ /* On PowerPC, we can use the RLWINM instruction */
+ return true;
+#endif
+#ifdef __aarch64__
+ /* Aarch64 can use the UBFX instruction */
+ return true;
+#endif
+#if defined(__x86__) || defined(__x86_64__)
+ /* x86 doesn't have enough registers, using cached values make
+ * little sense. Using jit_andi() will give a better result as it will
+ * use bit-shifts for low/high masks. */
+ return true;
+#endif
+
+ return false;
+}
+
+#endif /* __LIGHTREC_ARCH_H__ */
* Copyright (C) 2014-2021 Paul Cercueil <paul@crapouillou.net>
*/
+#include "arch.h"
#include "blockcache.h"
#include "debug.h"
#include "disassembler.h"
if (cycles && update_cycles) {
jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
- pr_debug("EOB: %u cycles\n", cycles);
+ pr_debug("EOB: %"PRIu32" cycles\n", cycles);
}
if (has_ds && op_flag_load_delay(ds->flags)
struct lightrec_branch *branch;
const struct opcode *op = &block->opcode_list[offset],
*ds = get_delay_slot(block->opcode_list, offset);
- jit_node_t *addr;
bool is_forward = (s16)op->i.imm >= 0;
int op_cycles = lightrec_cycles_of_opcode(state->state, op->c);
u32 target_offset, cycles = state->cycles + op_cycles;
bool no_indirection = false;
+ jit_node_t *addr = NULL;
u32 next_pc;
u8 rs, rt;
target_offset = offset + 1 + (s16)op->i.imm
- !!op_flag_no_ds(op->flags);
- pr_debug("Adding local branch to offset 0x%x\n",
+ pr_debug("Adding local branch to offset 0x%"PRIx32"\n",
target_offset << 2);
branch = &state->local_branches[
state->nb_local_branches++];
u8 reg_lo = get_mult_div_lo(c);
u8 reg_hi = get_mult_div_hi(c);
jit_state_t *_jit = block->_jit;
- u8 lo, hi, rs, rt, rflags = 0;
+ u8 lo, hi = 0, rs, rt, rflags = 0;
bool no_lo = op_flag_no_lo(flags);
bool no_hi = op_flag_no_hi(flags);
struct regcache *reg_cache = cstate->reg_cache;
u8 reg_imm;
- reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, mask);
- jit_andr(reg_out, reg_in, reg_imm);
+ if (arch_has_fast_mask()
+ && (is_low_mask(mask) || is_high_mask(mask))) {
+ jit_andi(reg_out, reg_in, mask);
+ } else {
+ reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit,
+ mask);
+ jit_andr(reg_out, reg_in, reg_imm);
- lightrec_free_reg(reg_cache, reg_imm);
+ lightrec_free_reg(reg_cache, reg_imm);
+ }
}
static void rec_store_memory(struct lightrec_cstate *cstate,
#include <lightning.h>
+#include "lightrec-config.h"
+
#if __WORDSIZE == 32
#define jit_ldxi_ui(u,v,w) jit_ldxi_i(u,v,w)
#define jit_b() jit_beqr(0, 0)
+#if defined(__sh__) && OPT_SH4_USE_GBR
+#define jit_add_state(u,v) \
+ do { \
+ jit_new_node_ww(jit_code_movr,_R0,LIGHTREC_REG_STATE); \
+ jit_new_node_www(jit_code_addr,u,v,_R0); \
+ } while (0)
+#else
#define jit_add_state(u,v) jit_addr(u,v,LIGHTREC_REG_STATE)
+#endif
#endif /* __LIGHTNING_WRAPPER_H__ */
#cmakedefine01 OPT_EARLY_UNLOAD
#cmakedefine01 OPT_PRELOAD_PC
+#cmakedefine01 OPT_SH4_USE_GBR
+
#endif /* __LIGHTREC_CONFIG_H__ */
return (value >> order) == 0;
}
+static inline _Bool is_low_mask(u32 imm)
+{
+ return imm & 1 ? popcount32(imm + 1) <= 1 : 0;
+}
+
+static inline _Bool is_high_mask(u32 imm)
+{
+ return imm ? popcount32(imm + BIT(ctz32(imm))) == 0 : 0;
+}
+
static inline const struct opcode *
get_delay_slot(const struct opcode *list, u16 i)
{
jit_tramp(256);
/* Load pointer to C wrapper */
- jit_addr(JIT_R1, JIT_R1, LIGHTREC_REG_STATE);
+ jit_add_state(JIT_R1, JIT_R1);
jit_ldxi(JIT_R1, JIT_R1, lightrec_offset(c_wrappers));
jit_epilog();
return 0;
}
- pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %u bytes\n",
+ pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %"PRIu32" bytes\n",
kunseg_pc, (uintptr_t)host, length);
memset(host, 0, length);
for (i = 0; i < cstate->nb_local_branches; i++) {
struct lightrec_branch *branch = &cstate->local_branches[i];
- pr_debug("Patch local branch to offset 0x%x\n",
+ pr_debug("Patch local branch to offset 0x%"PRIx32"\n",
branch->target << 2);
if (branch->target == 0) {
break;
}
- pr_debug("Multiply by power-of-two: %u\n",
+ pr_debug("Multiply by power-of-two: %"PRIu32"\n",
v[op->r.rt].value);
if (op->r.op == OP_SPECIAL_MULT)
switch (next.i.op) {
case OP_LWL:
case OP_LWR:
- case OP_REGIMM:
- case OP_BEQ:
- case OP_BNE:
- case OP_BLEZ:
- case OP_BGTZ:
continue;
}
+ if (has_delay_slot(next))
+ continue;
+
if (opcode_reads_register(next, c.i.rt)
&& !opcode_writes_register(next, c.i.rs)) {
pr_debug("Swapping opcodes at offset 0x%x to "
offset = i + 1 + (s16)list->c.i.imm;
- pr_debug("Found local branch to offset 0x%x\n", offset << 2);
+ pr_debug("Found local branch to offset 0x%"PRIx32"\n", offset << 2);
ds = get_delay_slot(block->opcode_list, i);
if (op_flag_load_delay(ds->flags) && opcode_is_load(ds->c)) {
#define __REGCACHE_H__
#include "lightning-wrapper.h"
+#include "lightrec-config.h"
-#define NUM_REGS (JIT_V_NUM - 1)
-#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
+#if defined(__sh__) && OPT_SH4_USE_GBR
+# define NUM_REGS JIT_V_NUM
+# define LIGHTREC_REG_STATE _GBR
+#else
+# define NUM_REGS (JIT_V_NUM - 1)
+# define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
+#endif
#if defined(__powerpc__)
# define NUM_TEMPS JIT_R_NUM