#include <stddef.h>
#include <string.h>
-#define GENMASK(h, l) \
- (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h))))
-
static struct block * lightrec_precompile_block(struct lightrec_state *state,
u32 pc);
static bool lightrec_block_is_fully_tagged(const struct block *block);
u32 opcode, void *host, u32 addr, u32 data)
{
unsigned int shift = addr & 0x3;
- unsigned int mask = GENMASK(31, (shift + 1) * 8);
+ unsigned int mask = shift < 3 ? GENMASK(31, (shift + 1) * 8) : 0;
u32 old_data;
/* Align to 32 bits */
u32 opcode, void *host, u32 addr, u32 data)
{
unsigned int shift = addr & 0x3;
- unsigned int mask = GENMASK(31, 32 - shift * 8);
+ unsigned int mask = shift ? GENMASK(31, 32 - shift * 8) : 0;
u32 old_data;
/* Align to 32 bits */
}
}
-enum psx_map
+static enum psx_map
lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr)
{
const struct lightrec_mem_map *map;
return map;
}
-u32 lightrec_rw(struct lightrec_state *state, union code op,
- u32 addr, u32 data, u32 *flags, struct block *block)
+u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base,
+ u32 data, u32 *flags, struct block *block, u16 offset)
{
const struct lightrec_mem_map *map;
const struct lightrec_mem_map_ops *ops;
u32 opcode = op.opcode;
+ bool was_tagged = true;
+ u16 old_flags;
+ u32 addr;
void *host;
- addr += (s16) op.i.imm;
+ addr = kunseg(base + (s16) op.i.imm);
- map = lightrec_get_map(state, &host, kunseg(addr));
+ map = lightrec_get_map(state, &host, addr);
if (!map) {
__segfault_cb(state, addr, block);
return 0;
}
+ if (flags)
+ was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(*flags);
if (likely(!map->ops)) {
- if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags))
- *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
+ if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) {
+ /* Force parallel port accesses as HW accesses, because
+ * the direct-I/O emitters can't differenciate it. */
+ if (unlikely(map == &state->maps[PSX_MAP_PARALLEL_PORT]))
+ *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
+ /* If the base register is 0x0, be extra suspicious.
+ * Some games (e.g. Sled Storm) actually do segmentation
+ * faults by using uninitialized pointers, which are
+ * later initialized to point to hardware registers. */
+ else if (op.i.rs && base == 0x0)
+ *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
+ else
+ *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
+ }
ops = &lightrec_default_ops;
} else if (flags &&
ops = map->ops;
}
+ if (!was_tagged) {
+ old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE);
+
+ if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) {
+ pr_debug("Opcode of block at PC 0x%08x has been tagged"
+ " - flag for recompilation\n", block->pc);
+
+ lut_write(state, lut_offset(block->pc), NULL);
+ }
+ }
+
switch (op.i.op) {
case OP_SB:
ops->sb(state, opcode, host, addr, (u8) data);
static void lightrec_rw_helper(struct lightrec_state *state,
union code op, u32 *flags,
- struct block *block)
+ struct block *block, u16 offset)
{
u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs],
- state->regs.gpr[op.i.rt], flags, block);
+ state->regs.gpr[op.i.rt], flags, block, offset);
switch (op.i.op) {
case OP_LB:
case OP_LWL:
case OP_LWR:
case OP_LW:
- if (op.i.rt)
+ if (OPT_HANDLE_LOAD_DELAYS && unlikely(!state->in_delay_slot_n)) {
+ state->temp_reg = ret;
+ state->in_delay_slot_n = 0xff;
+ } else if (op.i.rt) {
state->regs.gpr[op.i.rt] = ret;
+ }
fallthrough;
default:
break;
static void lightrec_rw_cb(struct lightrec_state *state, u32 arg)
{
- lightrec_rw_helper(state, (union code) arg, NULL, NULL);
+ lightrec_rw_helper(state, (union code) arg, NULL, NULL, 0);
}
static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg)
{
struct block *block;
struct opcode *op;
- bool was_tagged;
u16 offset = (u16)arg;
- u16 old_flags;
block = lightrec_find_block_from_lut(state->block_cache,
arg >> 16, state->next_pc);
}
op = &block->opcode_list[offset];
- was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags);
-
- lightrec_rw_helper(state, op->c, &op->flags, block);
-
- if (!was_tagged) {
- old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE);
-
- if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) {
- pr_debug("Opcode of block at PC 0x%08x has been tagged"
- " - flag for recompilation\n", block->pc);
-
- lut_write(state, lut_offset(block->pc), NULL);
- }
- }
+ lightrec_rw_helper(state, op->c, &op->flags, block, offset);
}
static u32 clamp_s32(s32 val, s32 min, s32 max)
if (op.i.op == OP_CP0)
return state->regs.cp0[op.r.rd];
- else if (op.r.rs == OP_CP2_BASIC_MFC2)
- return lightrec_mfc2(state, op.r.rd);
-
- val = state->regs.cp2c[op.r.rd];
- switch (op.r.rd) {
- case 4:
- case 12:
- case 20:
- case 26:
- case 27:
- case 29:
- case 30:
- return (u32)(s16)val;
- default:
- return val;
+ if (op.i.op == OP_SWC2) {
+ val = lightrec_mfc2(state, op.i.rt);
+ } else if (op.r.rs == OP_CP2_BASIC_MFC2)
+ val = lightrec_mfc2(state, op.r.rd);
+ else {
+ val = state->regs.cp2c[op.r.rd];
+
+ switch (op.r.rd) {
+ case 4:
+ case 12:
+ case 20:
+ case 26:
+ case 27:
+ case 29:
+ case 30:
+ val = (u32)(s16)val;
+ fallthrough;
+ default:
+ break;
+ }
}
+
+ if (state->ops.cop2_notify)
+ (*state->ops.cop2_notify)(state, op.opcode, val);
+
+ return val;
+}
+
+static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
+{
+ u32 rt = lightrec_mfc(state, op);
+
+ if (op.i.op == OP_SWC2)
+ state->temp_reg = rt;
+ else if (op.r.rt)
+ state->regs.gpr[op.r.rt] = rt;
}
static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
}
}
-void lightrec_mtc(struct lightrec_state *state, union code op, u32 data)
+void lightrec_mtc(struct lightrec_state *state, union code op, u8 reg, u32 data)
{
- if (op.i.op == OP_CP0)
- lightrec_mtc0(state, op.r.rd, data);
- else if (op.r.rs == OP_CP2_BASIC_CTC2)
- lightrec_ctc2(state, op.r.rd, data);
- else
- lightrec_mtc2(state, op.r.rd, data);
+ if (op.i.op == OP_CP0) {
+ lightrec_mtc0(state, reg, data);
+ } else {
+ if (op.i.op == OP_LWC2 || op.r.rs != OP_CP2_BASIC_CTC2)
+ lightrec_mtc2(state, reg, data);
+ else
+ lightrec_ctc2(state, reg, data);
+
+ if (state->ops.cop2_notify)
+ (*state->ops.cop2_notify)(state, op.opcode, data);
+ }
}
static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg)
{
union code op = (union code) arg;
+ u32 data;
+ u8 reg;
- lightrec_mtc(state, op, state->regs.gpr[op.r.rt]);
+ if (op.i.op == OP_LWC2) {
+ data = state->temp_reg;
+ reg = op.i.rt;
+ } else {
+ data = state->regs.gpr[op.r.rt];
+ reg = op.r.rd;
+ }
+
+ lightrec_mtc(state, op, reg, data);
}
void lightrec_rfe(struct lightrec_state *state)
void *func;
int err;
- for (;;) {
+ do {
func = lut_read(state, lut_offset(pc));
if (func && func != state->get_next_block)
break;
}
should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) &&
+ !block_has_flag(block, BLOCK_NEVER_COMPILE) &&
!block_has_flag(block, BLOCK_IS_DEAD);
if (unlikely(should_recompile)) {
} else {
lightrec_recompiler_add(state->rec, block);
}
-
- if (state->exit_flags != LIGHTREC_EXIT_NORMAL ||
- state->current_cycle >= state->target_cycle)
- break;
- }
+ } while (state->exit_flags == LIGHTREC_EXIT_NORMAL
+ && state->current_cycle < state->target_cycle);
state->next_pc = pc;
return func;
lightrec_code_alloc_unlock(state);
}
+static char lightning_code_data[0x80000];
+
static void * lightrec_emit_code(struct lightrec_state *state,
const struct block *block,
jit_state_t *_jit, unsigned int *size)
jit_realize();
- if (!ENABLE_DISASSEMBLER)
+ if (ENABLE_DISASSEMBLER)
+ jit_set_data(lightning_code_data, sizeof(lightning_code_data), 0);
+ else
jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE);
if (has_code_buffer) {
*size = (unsigned int) new_code_size;
+ if (state->ops.code_inv)
+ state->ops.code_inv(code, new_code_size);
+
return code;
}
unsigned int i;
jit_node_t *addr[C_WRAPPERS_COUNT - 1];
jit_node_t *to_end[C_WRAPPERS_COUNT - 1];
+ u8 tmp = JIT_R1;
+
+#ifdef __sh__
+ /* On SH, GBR-relative loads target the r0 register.
+ * Use it as the temporary register to factorize the move to
+ * JIT_R1. */
+ if (LIGHTREC_REG_STATE == _GBR)
+ tmp = _R0;
+#endif
block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
if (!block)
/* Add entry points */
for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) {
- jit_ldxi(JIT_R1, LIGHTREC_REG_STATE,
+ jit_ldxi(tmp, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, c_wrappers[i]));
to_end[i - 1] = jit_b();
addr[i - 1] = jit_indirect();
}
- jit_ldxi(JIT_R1, LIGHTREC_REG_STATE,
+ jit_ldxi(tmp, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, c_wrappers[0]));
for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
jit_patch(to_end[i]);
+ jit_movr(JIT_R1, tmp);
jit_epilog();
jit_prolog();
return 8 + 5 * (length + 3 / 4);
}
+static u32 lightrec_check_load_delay(struct lightrec_state *state, u32 pc, u8 reg)
+{
+ struct block *block;
+ union code first_op;
+
+ first_op = lightrec_read_opcode(state, pc);
+
+ if (likely(!opcode_reads_register(first_op, reg))) {
+ state->regs.gpr[reg] = state->temp_reg;
+ } else {
+ block = lightrec_get_block(state, pc);
+ if (unlikely(!block)) {
+ pr_err("Unable to get block at PC 0x%08x\n", pc);
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
+ pc = 0;
+ } else {
+ pc = lightrec_handle_load_delay(state, block, pc, reg);
+ }
+ }
+
+ return pc;
+}
+
+static void update_cycle_counter_before_c(jit_state_t *_jit)
+{
+ /* update state->current_cycle */
+ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, target_cycle));
+ jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
+ jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
+ LIGHTREC_REG_STATE, JIT_R1);
+}
+
+static void update_cycle_counter_after_c(jit_state_t *_jit)
+{
+ /* Recalc the delta */
+ jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, current_cycle));
+ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, target_cycle));
+ jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
+}
+
static struct block * generate_dispatcher(struct lightrec_state *state)
{
struct block *block;
jit_state_t *_jit;
- jit_node_t *to_end, *loop, *addr, *addr2, *addr3;
+ jit_node_t *to_end, *loop, *addr, *addr2, *addr3, *addr4, *addr5, *jmp, *jmp2;
unsigned int i;
u32 offset;
jit_prolog();
jit_frame(256);
+ jit_getarg(LIGHTREC_REG_STATE, jit_arg());
+ jit_getarg(JIT_V0, jit_arg());
jit_getarg(JIT_V1, jit_arg());
jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
for (i = 0; i < NUM_REGS; i++)
jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG));
- /* Pass lightrec_state structure to blocks, using the last callee-saved
- * register that Lightning provides */
- jit_movi(LIGHTREC_REG_STATE, (intptr_t) state);
-
loop = jit_label();
/* Call the block's code */
jit_prepare();
jit_pushargr(LIGHTREC_REG_STATE);
+
jit_finishi(lightrec_memset);
+ jit_retval(LIGHTREC_REG_CYCLE);
jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, regs.gpr[31]));
-
- jit_retval(LIGHTREC_REG_CYCLE);
jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE);
+
+ if (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS)
+ jmp = jit_b();
+ }
+
+ if (OPT_DETECT_IMPOSSIBLE_BRANCHES) {
+ /* Blocks will jump here when they reach a branch that should
+ * be executed with the interpreter, passing the branch's PC
+ * in JIT_V0 and the address of the block in JIT_V1. */
+ addr4 = jit_indirect();
+
+ update_cycle_counter_before_c(_jit);
+
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+ jit_pushargr(JIT_V1);
+ jit_pushargr(JIT_V0);
+ jit_finishi(lightrec_emulate_block);
+
+ jit_retval(JIT_V0);
+
+ update_cycle_counter_after_c(_jit);
+
+ if (OPT_HANDLE_LOAD_DELAYS)
+ jmp2 = jit_b();
+
+ }
+
+ if (OPT_HANDLE_LOAD_DELAYS) {
+ /* Blocks will jump here when they reach a branch with a load
+ * opcode in its delay slot. The delay slot has already been
+ * executed; the load value is in (state->temp_reg), and the
+ * register number is in JIT_V1.
+ * Jump to a C function which will evaluate the branch target's
+ * first opcode, to make sure that it does not read the register
+ * in question; and if it does, handle it accordingly. */
+ addr5 = jit_indirect();
+
+ update_cycle_counter_before_c(_jit);
+
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+ jit_pushargr(JIT_V0);
+ jit_pushargr(JIT_V1);
+ jit_finishi(lightrec_check_load_delay);
+
+ jit_retval(JIT_V0);
+
+ update_cycle_counter_after_c(_jit);
+
+ if (OPT_DETECT_IMPOSSIBLE_BRANCHES)
+ jit_patch(jmp2);
+ }
+
+ if (OPT_REPLACE_MEMSET
+ && (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS)) {
+ jit_patch(jmp);
}
/* The block will jump here, with the number of cycles remaining in
/* If possible, use the code LUT */
if (!lut_is_32bit(state))
jit_lshi(JIT_V1, JIT_V1, 1);
- jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE);
+ jit_add_state(JIT_V1, JIT_V1);
offset = offsetof(struct lightrec_state, code_lut);
if (lut_is_32bit(state))
if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
/* We may call the interpreter - update state->current_cycle */
- jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, target_cycle));
- jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE);
- jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
- LIGHTREC_REG_STATE, JIT_V1);
+ update_cycle_counter_before_c(_jit);
}
jit_prepare();
if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
/* The interpreter may have updated state->current_cycle and
* state->target_cycle - recalc the delta */
- jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, current_cycle));
- jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, target_cycle));
- jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
+ update_cycle_counter_after_c(_jit);
} else {
jit_movr(LIGHTREC_REG_CYCLE, JIT_V0);
}
+ /* Reset JIT_V0 to the next PC */
+ jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, next_pc));
+
/* If we get non-NULL, loop */
jit_patch_at(jit_bnei(JIT_V1, 0), loop);
goto err_free_block;
state->eob_wrapper_func = jit_address(addr2);
+ if (OPT_DETECT_IMPOSSIBLE_BRANCHES)
+ state->interpreter_func = jit_address(addr4);
+ if (OPT_HANDLE_LOAD_DELAYS)
+ state->ds_check_func = jit_address(addr5);
if (OPT_REPLACE_MEMSET)
state->memset_func = jit_address(addr3);
state->get_next_block = jit_address(addr);
return (union code) LE32TOH(*code);
}
-unsigned int lightrec_cycles_of_opcode(union code code)
+__cnst unsigned int lightrec_cycles_of_opcode(union code code)
{
return 2;
}
pr_debug("Block size: %hu opcodes\n", block->nb_ops);
- /* If the first opcode is an 'impossible' branch, never compile the
- * block */
- if (should_emulate(block->opcode_list))
- block_flags |= BLOCK_NEVER_COMPILE;
-
fully_tagged = lightrec_block_is_fully_tagged(block);
if (fully_tagged)
block_flags |= BLOCK_FULLY_TAGGED;
addr = state->get_next_block;
lut_write(state, lut_offset(pc), addr);
- pr_debug("Recompile count: %u\n", state->nb_precompile++);
+ pr_debug("Blocks created: %u\n", ++state->nb_precompile);
return block;
}
for (i = 0; i < block->nb_ops; i++) {
op = &block->opcode_list[i];
- /* Verify that all load/stores of the opcode list
- * Check all loads/stores of the opcode list and mark the
+ /* If we have one branch that must be emulated, we cannot trash
+ * the opcode list. */
+ if (should_emulate(op))
+ return false;
+
+ /* Check all loads/stores of the opcode list and mark the
* block as fully compiled if they all have been tagged. */
switch (op->c.i.op) {
case OP_LB:
block->_jit = _jit;
lightrec_regcache_reset(cstate->reg_cache);
+ lightrec_preload_pc(cstate->reg_cache);
+
cstate->cycles = 0;
cstate->nb_local_branches = 0;
cstate->nb_targets = 0;
+ cstate->no_load_delay = false;
jit_prolog();
jit_tramp(256);
pr_debug("Branch at offset 0x%x will be emulated\n",
i << 2);
- lightrec_emit_eob(cstate, block, i, false);
+ lightrec_emit_jump_to_interpreter(cstate, block, i);
skip_next = !op_flag_no_ds(elm->flags);
} else {
lightrec_rec_opcode(cstate, block, i);
lightrec_unregister(MEM_FOR_CODE, old_code_size);
}
+ pr_debug("Blocks compiled: %u\n", ++state->nb_compile);
+
return 0;
}
u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
{
- s32 (*func)(void *, s32) = (void *)state->dispatcher->function;
+ s32 (*func)(struct lightrec_state *, u32, void *, s32) = (void *)state->dispatcher->function;
void *block_trace;
s32 cycles_delta;
if (block_trace) {
cycles_delta = state->target_cycle - state->current_cycle;
- cycles_delta = (*func)(block_trace, cycles_delta);
+ cycles_delta = (*func)(state, state->next_pc,
+ block_trace, cycles_delta);
state->current_cycle = state->target_cycle - cycles_delta;
}
return NULL;
}
+ if (ops->cop2_notify)
+ pr_debug("Optional cop2_notify callback in lightrec_ops\n");
+ else
+ pr_debug("No optional cop2_notify callback in lightrec_ops\n");
+
if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER
&& codebuf_map->address) {
tlsf = tlsf_create_with_pool(codebuf_map->address,
state->tlsf = tlsf;
state->with_32bit_lut = with_32bit_lut;
+ state->in_delay_slot_n = 0xff;
state->block_cache = lightrec_blockcache_init(state);
if (!state->block_cache)
state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb;
state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
+ state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb;
state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb;