static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data);
static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg);
+static void lightrec_reap_block(struct lightrec_state *state, void *data);
+
static void lightrec_default_sb(struct lightrec_state *state, u32 opcode,
void *host, u32 addr, u32 data)
{
if (ENABLE_THREADED_COMPILER)
lightrec_recompiler_remove(state->rec, block);
- lightrec_unregister_block(state->block_cache, block);
remove_from_code_lut(state->block_cache, block);
- lightrec_free_block(state, block);
+
+ if (ENABLE_THREADED_COMPILER) {
+ lightrec_reaper_add(state->reaper,
+ lightrec_reap_block, block);
+ } else {
+ lightrec_unregister_block(state->block_cache, block);
+ lightrec_free_block(state, block);
+ }
}
block = NULL;
if (has_code_buffer) {
jit_get_code(&code_size);
+
+#ifdef __i386__
+ /* Lightning's code size estimation routine is buggy on x86 and
+ * will return a value that's too small. */
+ code_size *= 2;
+#endif
+
code = lightrec_alloc_code(state, (size_t) code_size);
if (!code) {
}
code = jit_emit();
+ if (!code) {
+ if (has_code_buffer)
+ lightrec_free_code(state, code);
+
+ return NULL;
+ }
jit_get_code(&new_code_size);
lightrec_register(MEM_FOR_CODE, new_code_size);
struct block *block;
jit_state_t *_jit;
unsigned int i;
- jit_node_t *addr[C_WRAPPERS_COUNT - 1];
- jit_node_t *to_end[C_WRAPPERS_COUNT - 1];
- u8 tmp = JIT_R1;
-
-#ifdef __sh__
- /* On SH, GBR-relative loads target the r0 register.
- * Use it as the temporary register to factorize the move to
- * JIT_R1. */
- if (LIGHTREC_REG_STATE == _GBR)
- tmp = _R0;
-#endif
block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
if (!block)
jit_prolog();
jit_tramp(256);
- /* Add entry points */
- for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) {
- jit_ldxi(tmp, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, c_wrappers[i]));
- to_end[i - 1] = jit_b();
- addr[i - 1] = jit_indirect();
- }
-
- jit_ldxi(tmp, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, c_wrappers[0]));
-
- for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
- jit_patch(to_end[i]);
- jit_movr(JIT_R1, tmp);
+ /* Load pointer to C wrapper */
+ jit_addr(JIT_R1, JIT_R1, LIGHTREC_REG_STATE);
+ jit_ldxi(JIT_R1, JIT_R1, lightrec_offset(c_wrappers));
jit_epilog();
jit_prolog();
/* Save all temporaries on stack */
for (i = 0; i < NUM_TEMPS; i++) {
if (i + FIRST_TEMP != 1) {
- jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]),
+ jit_stxi(lightrec_offset(wrapper_regs[i]),
LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP));
}
}
jit_pushargr(LIGHTREC_REG_STATE);
jit_pushargr(JIT_R2);
- jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, target_cycle));
+ jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE, lightrec_offset(target_cycle));
/* state->current_cycle = state->target_cycle - delta; */
jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE);
- jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
- LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE);
+ jit_stxi_i(lightrec_offset(current_cycle), LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE);
/* Call the wrapper function */
jit_finishr(JIT_R1);
/* delta = state->target_cycle - state->current_cycle */;
- jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, current_cycle));
- jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, target_cycle));
+ jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE, lightrec_offset(current_cycle));
+ jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE, lightrec_offset(target_cycle));
jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE);
/* Restore temporaries from stack */
for (i = 0; i < NUM_TEMPS; i++) {
if (i + FIRST_TEMP != 1) {
jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, wrapper_regs[i]));
+ lightrec_offset(wrapper_regs[i]));
}
}
block->function = lightrec_emit_code(state, block, _jit,
&block->code_size);
if (!block->function)
- goto err_free_block;
-
- state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function;
+ goto err_free_jit;
- for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
- state->wrappers_eps[i] = jit_address(addr[i]);
+ state->c_wrapper = block->function;
if (ENABLE_DISASSEMBLER) {
pr_debug("Wrapper block:\n");
jit_clear_state();
return block;
+err_free_jit:
+ jit_destroy_state();
err_free_block:
lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
err_no_mem:
static void update_cycle_counter_before_c(jit_state_t *_jit)
{
/* update state->current_cycle */
- jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, target_cycle));
+ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, lightrec_offset(target_cycle));
jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
- jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
- LIGHTREC_REG_STATE, JIT_R1);
+ jit_stxi_i(lightrec_offset(current_cycle), LIGHTREC_REG_STATE, JIT_R1);
}
static void update_cycle_counter_after_c(jit_state_t *_jit)
{
/* Recalc the delta */
- jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, current_cycle));
- jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, target_cycle));
+ jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, lightrec_offset(current_cycle));
+ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, lightrec_offset(target_cycle));
jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
}
{
if (lightrec_store_next_pc()) {
jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, next_pc));
+ lightrec_offset(next_pc));
}
}
{
struct block *block;
jit_state_t *_jit;
- jit_node_t *to_end, *loop, *addr, *addr2, *addr3, *addr4, *addr5, *jmp, *jmp2;
+ jit_node_t *to_end, *loop, *loop2,
+ *addr, *addr2, *addr3, *addr4, *addr5;
unsigned int i;
u32 offset;
/* Call the block's code */
jit_jmpr(JIT_V1);
- if (OPT_REPLACE_MEMSET) {
- /* Blocks will jump here when they need to call
- * lightrec_memset() */
- addr3 = jit_indirect();
-
- jit_movr(JIT_V1, LIGHTREC_REG_CYCLE);
-
- jit_prepare();
- jit_pushargr(LIGHTREC_REG_STATE);
-
- jit_finishi(lightrec_memset);
- jit_retval(LIGHTREC_REG_CYCLE);
-
- jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, regs.gpr[31]));
- jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE);
-
- if (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS)
- jmp = jit_b();
- }
-
- if (OPT_DETECT_IMPOSSIBLE_BRANCHES) {
- /* Blocks will jump here when they reach a branch that should
- * be executed with the interpreter, passing the branch's PC
- * in JIT_V0 and the address of the block in JIT_V1. */
- addr4 = jit_indirect();
-
- sync_next_pc(_jit);
- update_cycle_counter_before_c(_jit);
-
- jit_prepare();
- jit_pushargr(LIGHTREC_REG_STATE);
- jit_pushargr(JIT_V1);
- jit_pushargr(JIT_V0);
- jit_finishi(lightrec_emulate_block);
-
- jit_retval(JIT_V0);
-
- update_cycle_counter_after_c(_jit);
-
- if (OPT_HANDLE_LOAD_DELAYS)
- jmp2 = jit_b();
-
- }
-
- if (OPT_HANDLE_LOAD_DELAYS) {
- /* Blocks will jump here when they reach a branch with a load
- * opcode in its delay slot. The delay slot has already been
- * executed; the load value is in (state->temp_reg), and the
- * register number is in JIT_V1.
- * Jump to a C function which will evaluate the branch target's
- * first opcode, to make sure that it does not read the register
- * in question; and if it does, handle it accordingly. */
- addr5 = jit_indirect();
-
- sync_next_pc(_jit);
- update_cycle_counter_before_c(_jit);
-
- jit_prepare();
- jit_pushargr(LIGHTREC_REG_STATE);
- jit_pushargr(JIT_V0);
- jit_pushargr(JIT_V1);
- jit_finishi(lightrec_check_load_delay);
-
- jit_retval(JIT_V0);
-
- update_cycle_counter_after_c(_jit);
- }
-
/* The block will jump here, with the number of cycles remaining in
* LIGHTREC_REG_CYCLE */
addr2 = jit_indirect();
sync_next_pc(_jit);
- if (OPT_HANDLE_LOAD_DELAYS && OPT_DETECT_IMPOSSIBLE_BRANCHES)
- jit_patch(jmp2);
-
- if (OPT_REPLACE_MEMSET
- && (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS)) {
- jit_patch(jmp);
- }
-
- /* Store back the next PC to the lightrec_state structure */
- offset = offsetof(struct lightrec_state, curr_pc);
- jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0);
+ loop2 = jit_label();
/* Jump to end if state->target_cycle < state->current_cycle */
to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
/* Convert next PC to KUNSEG and avoid mirrors */
- jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
- jit_rshi_u(JIT_R1, JIT_V1, 28);
+ jit_andi(JIT_V1, JIT_V0, RAM_SIZE - 1);
jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1);
+ jit_andi(JIT_R1, JIT_V0, BIT(28));
jit_addi(JIT_R2, JIT_R2, RAM_SIZE);
jit_movnr(JIT_V1, JIT_R2, JIT_R1);
jit_lshi(JIT_V1, JIT_V1, 1);
jit_add_state(JIT_V1, JIT_V1);
- offset = offsetof(struct lightrec_state, code_lut);
+ offset = lightrec_offset(code_lut);
if (lut_is_32bit(state))
jit_ldxi_ui(JIT_V1, JIT_V1, offset);
else
jit_ldxi(JIT_V1, JIT_V1, offset);
+ /* Store back the current PC to the lightrec_state structure */
+ jit_stxi_i(lightrec_offset(curr_pc), LIGHTREC_REG_STATE, JIT_V0);
+
/* If we get non-NULL, loop */
jit_patch_at(jit_bnei(JIT_V1, 0), loop);
}
/* Reset JIT_V0 to the next PC */
- jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, curr_pc));
+ jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, lightrec_offset(curr_pc));
/* If we get non-NULL, loop */
jit_patch_at(jit_bnei(JIT_V1, 0), loop);
jit_note(__FILE__, __LINE__);
jit_patch(to_end);
+ /* Store back the current PC to the lightrec_state structure */
+ jit_stxi_i(lightrec_offset(curr_pc), LIGHTREC_REG_STATE, JIT_V0);
+
jit_retr(LIGHTREC_REG_CYCLE);
+
+ if (OPT_REPLACE_MEMSET) {
+ /* Blocks will jump here when they need to call
+ * lightrec_memset() */
+ addr3 = jit_indirect();
+
+ jit_movr(JIT_V1, LIGHTREC_REG_CYCLE);
+
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+
+ jit_finishi(lightrec_memset);
+ jit_retval(LIGHTREC_REG_CYCLE);
+
+ jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, lightrec_offset(regs.gpr[31]));
+
+ jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE);
+
+ jit_patch_at(jit_b(), loop2);
+ }
+
+ if (OPT_DETECT_IMPOSSIBLE_BRANCHES) {
+ /* Blocks will jump here when they reach a branch that should
+ * be executed with the interpreter, passing the branch's PC
+ * in JIT_V0 and the address of the block in JIT_V1. */
+ addr4 = jit_indirect();
+
+ sync_next_pc(_jit);
+ update_cycle_counter_before_c(_jit);
+
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+ jit_pushargr(JIT_V1);
+ jit_pushargr(JIT_V0);
+ jit_finishi(lightrec_emulate_block);
+
+ jit_retval(JIT_V0);
+
+ update_cycle_counter_after_c(_jit);
+
+ jit_patch_at(jit_b(), loop2);
+
+ }
+
+ if (OPT_HANDLE_LOAD_DELAYS) {
+ /* Blocks will jump here when they reach a branch with a load
+ * opcode in its delay slot. The delay slot has already been
+ * executed; the load value is in (state->temp_reg), and the
+ * register number is in JIT_V1.
+ * Jump to a C function which will evaluate the branch target's
+ * first opcode, to make sure that it does not read the register
+ * in question; and if it does, handle it accordingly. */
+ addr5 = jit_indirect();
+
+ sync_next_pc(_jit);
+ update_cycle_counter_before_c(_jit);
+
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+ jit_pushargr(JIT_V0);
+ jit_pushargr(JIT_V1);
+ jit_finishi(lightrec_check_load_delay);
+
+ jit_retval(JIT_V0);
+
+ update_cycle_counter_after_c(_jit);
+
+ jit_patch_at(jit_b(), loop2);
+ }
+
jit_epilog();
block->_jit = _jit;
block->function = lightrec_emit_code(state, block, _jit,
&block->code_size);
if (!block->function)
- goto err_free_block;
+ goto err_free_jit;
state->eob_wrapper_func = jit_address(addr2);
if (OPT_DETECT_IMPOSSIBLE_BRANCHES)
jit_clear_state();
return block;
+err_free_jit:
+ jit_destroy_state();
err_free_block:
lightrec_free(state, MEM_FOR_IR, sizeof(*block), block);
err_no_mem:
lightrec_register(MEM_FOR_MIPS_CODE, length);
if (ENABLE_DISASSEMBLER) {
- pr_debug("Disassembled block at PC: 0x%08x\n", block->pc);
+ pr_debug("Disassembled block at "PC_FMT"\n", block->pc);
lightrec_print_disassembly(block, code);
}
int lightrec_compile_block(struct lightrec_cstate *cstate,
struct block *block)
{
+ struct block *dead_blocks[ARRAY_SIZE(cstate->targets)];
+ u32 was_dead[ARRAY_SIZE(cstate->targets) / 8];
struct lightrec_state *state = cstate->state;
struct lightrec_branch_target *target;
bool fully_tagged = false;
/* Add compiled function to the LUT */
lut_write(state, lut_offset(block->pc), block->function);
- if (ENABLE_THREADED_COMPILER)
- lightrec_reaper_continue(state->reaper);
-
/* Detect old blocks that have been covered by the new one */
- for (i = 0; i < cstate->nb_targets; i++) {
+ for (i = 0; ENABLE_THREADED_COMPILER && i < cstate->nb_targets; i++) {
target = &cstate->targets[i];
if (!target->offset)
offset = block->pc + target->offset * sizeof(u32);
- /* Pause the reaper while we search for the block until we set
- * the BLOCK_IS_DEAD flag, otherwise the block may be removed
- * under our feet. */
- if (ENABLE_THREADED_COMPILER)
- lightrec_reaper_pause(state->reaper);
-
block2 = lightrec_find_block(state->block_cache, offset);
if (block2) {
/* No need to check if block2 is compilable - it must
/* Set the "block dead" flag to prevent the dynarec from
* recompiling this block */
old_flags = block_set_flags(block2, BLOCK_IS_DEAD);
+
+ if (old_flags & BLOCK_IS_DEAD)
+ was_dead[i / 32] |= BIT(i % 32);
+ else
+ was_dead[i / 32] &= ~BIT(i % 32);
}
- if (ENABLE_THREADED_COMPILER) {
- lightrec_reaper_continue(state->reaper);
+ dead_blocks[i] = block2;
- /* If block2 was pending for compilation, cancel it.
- * If it's being compiled right now, wait until it
- * finishes. */
- if (block2)
- lightrec_recompiler_remove(state->rec, block2);
- }
+ /* If block2 was pending for compilation, cancel it.
+ * If it's being compiled right now, wait until it finishes. */
+ if (block2)
+ lightrec_recompiler_remove(state->rec, block2);
+ }
+
+ for (i = 0; i < cstate->nb_targets; i++) {
+ target = &cstate->targets[i];
+
+ if (!target->offset)
+ continue;
/* We know from now on that block2 (if present) isn't going to
* be compiled. We can override the LUT entry with our new
offset = lut_offset(block->pc) + target->offset;
lut_write(state, offset, jit_address(target->label));
+ if (ENABLE_THREADED_COMPILER) {
+ block2 = dead_blocks[i];
+ } else {
+ offset = block->pc + target->offset * sizeof(u32);
+ block2 = lightrec_find_block(state->block_cache, offset);
+ }
if (block2) {
- pr_debug("Reap block 0x%08x as it's covered by block "
- "0x%08x\n", block2->pc, block->pc);
+ pr_debug("Reap block "X32_FMT" as it's covered by block "
+ X32_FMT"\n", block2->pc, block->pc);
/* Finally, reap the block. */
if (!ENABLE_THREADED_COMPILER) {
lightrec_unregister_block(state->block_cache, block2);
lightrec_free_block(state, block2);
- } else if (!(old_flags & BLOCK_IS_DEAD)) {
+ } else if (!(was_dead[i / 32] & BIT(i % 32))) {
lightrec_reaper_add(state->reaper,
lightrec_reap_block,
block2);
}
}
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_reaper_continue(state->reaper);
+
if (ENABLE_DISASSEMBLER) {
- pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
+ pr_debug("Compiling block at "PC_FMT"\n", block->pc);
jit_disassemble();
}
}
if (oldjit) {
- pr_debug("Block 0x%08x recompiled, reaping old jit context.\n",
+ pr_debug("Block "X32_FMT" recompiled, reaping old jit context.\n",
block->pc);
if (ENABLE_THREADED_COMPILER) {
}
struct lightrec_state * lightrec_init(char *argv0,
- const struct lightrec_mem_map *map,
+ const struct lightrec_mem_map *maps,
size_t nb,
const struct lightrec_ops *ops)
{
- const struct lightrec_mem_map *codebuf_map = &map[PSX_MAP_CODE_BUFFER];
+ const struct lightrec_mem_map *codebuf_map = &maps[PSX_MAP_CODE_BUFFER];
+ const struct lightrec_mem_map *map;
struct lightrec_state *state;
uintptr_t addr;
void *tlsf = NULL;
}
state->nb_maps = nb;
- state->maps = map;
+ state->maps = maps;
memcpy(&state->ops, ops, sizeof(*ops));
state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb;
- map = &state->maps[PSX_MAP_BIOS];
+ map = &maps[PSX_MAP_BIOS];
state->offset_bios = (uintptr_t)map->address - map->pc;
- map = &state->maps[PSX_MAP_SCRATCH_PAD];
+ map = &maps[PSX_MAP_SCRATCH_PAD];
state->offset_scratch = (uintptr_t)map->address - map->pc;
- map = &state->maps[PSX_MAP_HW_REGISTERS];
+ map = &maps[PSX_MAP_HW_REGISTERS];
state->offset_io = (uintptr_t)map->address - map->pc;
- map = &state->maps[PSX_MAP_KERNEL_USER_RAM];
+ map = &maps[PSX_MAP_KERNEL_USER_RAM];
state->offset_ram = (uintptr_t)map->address - map->pc;
- if (state->maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 &&
- state->maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 &&
- state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
+ if (maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 &&
+ maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 &&
+ maps[PSX_MAP_MIRROR3].address == map->address + 0x600000)
state->mirrors_mapped = true;
if (state->offset_bios == 0 &&
void lightrec_set_cycles_per_opcode(struct lightrec_state *state, u32 cycles)
{
+ if (state->cycles_per_op == cycles)
+ return;
+
state->cycles_per_op = cycles;
+
+ if (ENABLE_THREADED_COMPILER) {
+ lightrec_recompiler_pause(state->rec);
+ lightrec_reaper_reap(state->reaper);
+ }
+
+ lightrec_invalidate_all(state);
+ lightrec_free_all_blocks(state->block_cache);
+
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_recompiler_unpause(state->rec);
}