X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightrec%2Flightrec.c;h=ec26bff77b408581f86cc7c5be6c3ab83dd6e254;hb=8afce295870dc97704b0e9e1efe1801b6b56090d;hp=79db44776ff84a3bef8ae7855e2fe50e1f2efeac;hpb=03b78a3bf48813202e01149ae0b3c5c1f01efb4c;p=pcsx_rearmed.git diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 79db4477..ec26bff7 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -35,19 +35,21 @@ static bool lightrec_block_is_fully_tagged(const struct block *block); static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data); static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg); +static void lightrec_reap_block(struct lightrec_state *state, void *data); + static void lightrec_default_sb(struct lightrec_state *state, u32 opcode, - void *host, u32 addr, u8 data) + void *host, u32 addr, u32 data) { - *(u8 *)host = data; + *(u8 *)host = (u8)data; if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate(state, addr, 1); } static void lightrec_default_sh(struct lightrec_state *state, u32 opcode, - void *host, u32 addr, u16 data) + void *host, u32 addr, u32 data) { - *(u16 *)host = HTOLE16(data); + *(u16 *)host = HTOLE16((u16)data); if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate(state, addr, 2); @@ -80,6 +82,27 @@ static u32 lightrec_default_lw(struct lightrec_state *state, return LE32TOH(*(u32 *)host); } +static u32 lightrec_default_lwu(struct lightrec_state *state, + u32 opcode, void *host, u32 addr) +{ + u32 val; + + memcpy(&val, host, 4); + + return LE32TOH(val); +} + +static void lightrec_default_swu(struct lightrec_state *state, u32 opcode, + void *host, u32 addr, u32 data) +{ + data = HTOLE32(data); + + memcpy(host, &data, 4); + + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) + lightrec_invalidate(state, addr & ~0x3, 8); +} + static const struct lightrec_mem_map_ops lightrec_default_ops = { .sb = lightrec_default_sb, .sh = lightrec_default_sh, @@ -87,6 +110,8 @@ static const struct lightrec_mem_map_ops lightrec_default_ops = { .lb = lightrec_default_lb, .lh = lightrec_default_lh, .lw = lightrec_default_lw, + .lwu = lightrec_default_lwu, + .swu = lightrec_default_swu, }; static void __segfault_cb(struct lightrec_state *state, u32 addr, @@ -94,9 +119,9 @@ static void __segfault_cb(struct lightrec_state *state, u32 addr, { lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); pr_err("Segmentation fault in recompiled code: invalid " - "load/store at address 0x%08x\n", addr); + "load/store at address "PC_FMT"\n", addr); if (block) - pr_err("Was executing block PC 0x%08x\n", block->pc); + pr_err("Was executing block "PC_FMT"\n", block->pc); } static void lightrec_swl(struct lightrec_state *state, @@ -290,7 +315,7 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base, old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { - pr_debug("Opcode of block at PC 0x%08x has been tagged" + pr_debug("Opcode of block at "PC_FMT" has been tagged" " - flag for recompilation\n", block->pc); lut_write(state, lut_offset(block->pc), NULL); @@ -299,10 +324,10 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base, switch (op.i.op) { case OP_SB: - ops->sb(state, opcode, host, addr, (u8) data); + ops->sb(state, opcode, host, addr, data); return 0; case OP_SH: - ops->sh(state, opcode, host, addr, (u16) data); + ops->sh(state, opcode, host, addr, data); return 0; case OP_SWL: lightrec_swl(state, ops, opcode, host, addr, data); @@ -331,6 +356,11 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base, return lightrec_lwl(state, ops, opcode, host, addr, data); case OP_LWR: return lightrec_lwr(state, ops, opcode, host, addr, data); + case OP_META_LWU: + return ops->lwu(state, opcode, host, addr); + case OP_META_SWU: + ops->swu(state, opcode, host, addr, data); + return 0; case OP_LW: default: return ops->lw(state, opcode, host, addr); @@ -352,6 +382,7 @@ static void lightrec_rw_helper(struct lightrec_state *state, case OP_LWL: case OP_LWR: case OP_LW: + case OP_META_LWU: if (OPT_HANDLE_LOAD_DELAYS && unlikely(!state->in_delay_slot_n)) { state->temp_reg = ret; state->in_delay_slot_n = 0xff; @@ -378,7 +409,7 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) block = lightrec_find_block_from_lut(state->block_cache, arg >> 16, state->curr_pc); if (unlikely(!block)) { - pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n", + pr_err("rw_generic: No block found in LUT for "PC_FMT" offset 0x%"PRIx16"\n", state->curr_pc, offset); lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return; @@ -665,7 +696,7 @@ static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) u8 old_flags; if (block && lightrec_block_is_outdated(state, block)) { - pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); + pr_debug("Block at "PC_FMT" is outdated!\n", block->pc); old_flags = block_set_flags(block, BLOCK_IS_DEAD); if (!(old_flags & BLOCK_IS_DEAD)) { @@ -674,9 +705,15 @@ static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) if (ENABLE_THREADED_COMPILER) lightrec_recompiler_remove(state->rec, block); - lightrec_unregister_block(state->block_cache, block); remove_from_code_lut(state->block_cache, block); - lightrec_free_block(state, block); + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_add(state->reaper, + lightrec_reap_block, block); + } else { + lightrec_unregister_block(state->block_cache, block); + lightrec_free_block(state, block); + } } block = NULL; @@ -685,7 +722,7 @@ static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) if (!block) { block = lightrec_precompile_block(state, pc); if (!block) { - pr_err("Unable to recompile block at PC 0x%x\n", pc); + pr_err("Unable to recompile block at "PC_FMT"\n", pc); lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return NULL; } @@ -724,7 +761,7 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) !block_has_flag(block, BLOCK_IS_DEAD); if (unlikely(should_recompile)) { - pr_debug("Block at PC 0x%08x should recompile\n", pc); + pr_debug("Block at "PC_FMT" should recompile\n", pc); if (ENABLE_THREADED_COMPILER) { lightrec_recompiler_add(state->rec, block); @@ -840,6 +877,13 @@ static void * lightrec_emit_code(struct lightrec_state *state, if (has_code_buffer) { jit_get_code(&code_size); + +#ifdef __i386__ + /* Lightning's code size estimation routine is buggy on x86 and + * will return a value that's too small. */ + code_size *= 2; +#endif + code = lightrec_alloc_code(state, (size_t) code_size); if (!code) { @@ -867,6 +911,12 @@ static void * lightrec_emit_code(struct lightrec_state *state, } code = jit_emit(); + if (!code) { + if (has_code_buffer) + lightrec_free_code(state, code); + + return NULL; + } jit_get_code(&new_code_size); lightrec_register(MEM_FOR_CODE, new_code_size); @@ -892,17 +942,6 @@ static struct block * generate_wrapper(struct lightrec_state *state) struct block *block; jit_state_t *_jit; unsigned int i; - jit_node_t *addr[C_WRAPPERS_COUNT - 1]; - jit_node_t *to_end[C_WRAPPERS_COUNT - 1]; - u8 tmp = JIT_R1; - -#ifdef __sh__ - /* On SH, GBR-relative loads target the r0 register. - * Use it as the temporary register to factorize the move to - * JIT_R1. */ - if (LIGHTREC_REG_STATE == _GBR) - tmp = _R0; -#endif block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -919,20 +958,9 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_prolog(); jit_tramp(256); - /* Add entry points */ - for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { - jit_ldxi(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrappers[i])); - to_end[i - 1] = jit_b(); - addr[i - 1] = jit_indirect(); - } - - jit_ldxi(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrappers[0])); - - for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) - jit_patch(to_end[i]); - jit_movr(JIT_R1, tmp); + /* Load pointer to C wrapper */ + jit_addr(JIT_R1, JIT_R1, LIGHTREC_REG_STATE); + jit_ldxi(JIT_R1, JIT_R1, lightrec_offset(c_wrappers)); jit_epilog(); jit_prolog(); @@ -940,7 +968,7 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* Save all temporaries on stack */ for (i = 0; i < NUM_TEMPS; i++) { if (i + FIRST_TEMP != 1) { - jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]), + jit_stxi(lightrec_offset(wrapper_regs[i]), LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP)); } } @@ -951,29 +979,25 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(JIT_R2); - jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); + jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE, lightrec_offset(target_cycle)); /* state->current_cycle = state->target_cycle - delta; */ jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE); - jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE); + jit_stxi_i(lightrec_offset(current_cycle), LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE); /* Call the wrapper function */ jit_finishr(JIT_R1); /* delta = state->target_cycle - state->current_cycle */; - jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, current_cycle)); - jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); + jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE, lightrec_offset(current_cycle)); + jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE, lightrec_offset(target_cycle)); jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE); /* Restore temporaries from stack */ for (i = 0; i < NUM_TEMPS; i++) { if (i + FIRST_TEMP != 1) { jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, wrapper_regs[i])); + lightrec_offset(wrapper_regs[i])); } } @@ -988,12 +1012,9 @@ static struct block * generate_wrapper(struct lightrec_state *state) block->function = lightrec_emit_code(state, block, _jit, &block->code_size); if (!block->function) - goto err_free_block; - - state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function; + goto err_free_jit; - for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) - state->wrappers_eps[i] = jit_address(addr[i]); + state->c_wrapper = block->function; if (ENABLE_DISASSEMBLER) { pr_debug("Wrapper block:\n"); @@ -1003,6 +1024,8 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_clear_state(); return block; +err_free_jit: + jit_destroy_state(); err_free_block: lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); err_no_mem: @@ -1018,12 +1041,12 @@ static u32 lightrec_memset(struct lightrec_state *state) u32 length = state->regs.gpr[5] * 4; if (!map) { - pr_err("Unable to find memory map for memset target address " - "0x%x\n", kunseg_pc); + pr_err("Unable to find memory map for memset target address "PC_FMT"\n", + kunseg_pc); return 0; } - pr_debug("Calling host memset, PC 0x%x (host address 0x%" PRIxPTR ") for %u bytes\n", + pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %u bytes\n", kunseg_pc, (uintptr_t)host, length); memset(host, 0, length); @@ -1046,7 +1069,7 @@ static u32 lightrec_check_load_delay(struct lightrec_state *state, u32 pc, u8 re } else { block = lightrec_get_block(state, pc); if (unlikely(!block)) { - pr_err("Unable to get block at PC 0x%08x\n", pc); + pr_err("Unable to get block at "PC_FMT"\n", pc); lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); pc = 0; } else { @@ -1060,20 +1083,16 @@ static u32 lightrec_check_load_delay(struct lightrec_state *state, u32 pc, u8 re static void update_cycle_counter_before_c(jit_state_t *_jit) { /* update state->current_cycle */ - jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, lightrec_offset(target_cycle)); jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); - jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, JIT_R1); + jit_stxi_i(lightrec_offset(current_cycle), LIGHTREC_REG_STATE, JIT_R1); } static void update_cycle_counter_after_c(jit_state_t *_jit) { /* Recalc the delta */ - jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, current_cycle)); - jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); + jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, lightrec_offset(current_cycle)); + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, lightrec_offset(target_cycle)); jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); } @@ -1081,7 +1100,7 @@ static void sync_next_pc(jit_state_t *_jit) { if (lightrec_store_next_pc()) { jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, next_pc)); + lightrec_offset(next_pc)); } } @@ -1089,7 +1108,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *loop, *addr, *addr2, *addr3, *addr4, *addr5, *jmp, *jmp2; + jit_node_t *to_end, *loop, *loop2, + *addr, *addr2, *addr3, *addr4, *addr5; unsigned int i; u32 offset; @@ -1121,100 +1141,21 @@ static struct block * generate_dispatcher(struct lightrec_state *state) /* Call the block's code */ jit_jmpr(JIT_V1); - if (OPT_REPLACE_MEMSET) { - /* Blocks will jump here when they need to call - * lightrec_memset() */ - addr3 = jit_indirect(); - - jit_movr(JIT_V1, LIGHTREC_REG_CYCLE); - - jit_prepare(); - jit_pushargr(LIGHTREC_REG_STATE); - - jit_finishi(lightrec_memset); - jit_retval(LIGHTREC_REG_CYCLE); - - jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, regs.gpr[31])); - jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); - - if (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS) - jmp = jit_b(); - } - - if (OPT_DETECT_IMPOSSIBLE_BRANCHES) { - /* Blocks will jump here when they reach a branch that should - * be executed with the interpreter, passing the branch's PC - * in JIT_V0 and the address of the block in JIT_V1. */ - addr4 = jit_indirect(); - - sync_next_pc(_jit); - update_cycle_counter_before_c(_jit); - - jit_prepare(); - jit_pushargr(LIGHTREC_REG_STATE); - jit_pushargr(JIT_V1); - jit_pushargr(JIT_V0); - jit_finishi(lightrec_emulate_block); - - jit_retval(JIT_V0); - - update_cycle_counter_after_c(_jit); - - if (OPT_HANDLE_LOAD_DELAYS) - jmp2 = jit_b(); - - } - - if (OPT_HANDLE_LOAD_DELAYS) { - /* Blocks will jump here when they reach a branch with a load - * opcode in its delay slot. The delay slot has already been - * executed; the load value is in (state->temp_reg), and the - * register number is in JIT_V1. - * Jump to a C function which will evaluate the branch target's - * first opcode, to make sure that it does not read the register - * in question; and if it does, handle it accordingly. */ - addr5 = jit_indirect(); - - sync_next_pc(_jit); - update_cycle_counter_before_c(_jit); - - jit_prepare(); - jit_pushargr(LIGHTREC_REG_STATE); - jit_pushargr(JIT_V0); - jit_pushargr(JIT_V1); - jit_finishi(lightrec_check_load_delay); - - jit_retval(JIT_V0); - - update_cycle_counter_after_c(_jit); - } - /* The block will jump here, with the number of cycles remaining in * LIGHTREC_REG_CYCLE */ addr2 = jit_indirect(); sync_next_pc(_jit); - if (OPT_HANDLE_LOAD_DELAYS && OPT_DETECT_IMPOSSIBLE_BRANCHES) - jit_patch(jmp2); - - if (OPT_REPLACE_MEMSET - && (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS)) { - jit_patch(jmp); - } - - /* Store back the next PC to the lightrec_state structure */ - offset = offsetof(struct lightrec_state, curr_pc); - jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0); + loop2 = jit_label(); /* Jump to end if state->target_cycle < state->current_cycle */ to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); /* Convert next PC to KUNSEG and avoid mirrors */ - jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); - jit_rshi_u(JIT_R1, JIT_V1, 28); + jit_andi(JIT_V1, JIT_V0, RAM_SIZE - 1); jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1); + jit_andi(JIT_R1, JIT_V0, BIT(28)); jit_addi(JIT_R2, JIT_R2, RAM_SIZE); jit_movnr(JIT_V1, JIT_R2, JIT_R1); @@ -1223,12 +1164,15 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_lshi(JIT_V1, JIT_V1, 1); jit_add_state(JIT_V1, JIT_V1); - offset = offsetof(struct lightrec_state, code_lut); + offset = lightrec_offset(code_lut); if (lut_is_32bit(state)) jit_ldxi_ui(JIT_V1, JIT_V1, offset); else jit_ldxi(JIT_V1, JIT_V1, offset); + /* Store back the current PC to the lightrec_state structure */ + jit_stxi_i(lightrec_offset(curr_pc), LIGHTREC_REG_STATE, JIT_V0); + /* If we get non-NULL, loop */ jit_patch_at(jit_bnei(JIT_V1, 0), loop); @@ -1265,8 +1209,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) } /* Reset JIT_V0 to the next PC */ - jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, curr_pc)); + jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, lightrec_offset(curr_pc)); /* If we get non-NULL, loop */ jit_patch_at(jit_bnei(JIT_V1, 0), loop); @@ -1275,7 +1218,80 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_note(__FILE__, __LINE__); jit_patch(to_end); + /* Store back the current PC to the lightrec_state structure */ + jit_stxi_i(lightrec_offset(curr_pc), LIGHTREC_REG_STATE, JIT_V0); + jit_retr(LIGHTREC_REG_CYCLE); + + if (OPT_REPLACE_MEMSET) { + /* Blocks will jump here when they need to call + * lightrec_memset() */ + addr3 = jit_indirect(); + + jit_movr(JIT_V1, LIGHTREC_REG_CYCLE); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + + jit_finishi(lightrec_memset); + jit_retval(LIGHTREC_REG_CYCLE); + + jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, lightrec_offset(regs.gpr[31])); + + jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); + + jit_patch_at(jit_b(), loop2); + } + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) { + /* Blocks will jump here when they reach a branch that should + * be executed with the interpreter, passing the branch's PC + * in JIT_V0 and the address of the block in JIT_V1. */ + addr4 = jit_indirect(); + + sync_next_pc(_jit); + update_cycle_counter_before_c(_jit); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V1); + jit_pushargr(JIT_V0); + jit_finishi(lightrec_emulate_block); + + jit_retval(JIT_V0); + + update_cycle_counter_after_c(_jit); + + jit_patch_at(jit_b(), loop2); + + } + + if (OPT_HANDLE_LOAD_DELAYS) { + /* Blocks will jump here when they reach a branch with a load + * opcode in its delay slot. The delay slot has already been + * executed; the load value is in (state->temp_reg), and the + * register number is in JIT_V1. + * Jump to a C function which will evaluate the branch target's + * first opcode, to make sure that it does not read the register + * in question; and if it does, handle it accordingly. */ + addr5 = jit_indirect(); + + sync_next_pc(_jit); + update_cycle_counter_before_c(_jit); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V0); + jit_pushargr(JIT_V1); + jit_finishi(lightrec_check_load_delay); + + jit_retval(JIT_V0); + + update_cycle_counter_after_c(_jit); + + jit_patch_at(jit_b(), loop2); + } + jit_epilog(); block->_jit = _jit; @@ -1286,7 +1302,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) block->function = lightrec_emit_code(state, block, _jit, &block->code_size); if (!block->function) - goto err_free_block; + goto err_free_jit; state->eob_wrapper_func = jit_address(addr2); if (OPT_DETECT_IMPOSSIBLE_BRANCHES) @@ -1306,6 +1322,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_clear_state(); return block; +err_free_jit: + jit_destroy_state(); err_free_block: lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); err_no_mem: @@ -1426,7 +1444,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, lightrec_register(MEM_FOR_MIPS_CODE, length); if (ENABLE_DISASSEMBLER) { - pr_debug("Disassembled block at PC: 0x%08x\n", block->pc); + pr_debug("Disassembled block at "PC_FMT"\n", block->pc); lightrec_print_disassembly(block, code); } @@ -1482,6 +1500,8 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) case OP_SWR: case OP_LWC2: case OP_SWC2: + case OP_META_LWU: + case OP_META_SWU: if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags)) return false; fallthrough; @@ -1497,7 +1517,7 @@ static void lightrec_reap_block(struct lightrec_state *state, void *data) { struct block *block = data; - pr_debug("Reap dead block at PC 0x%08x\n", block->pc); + pr_debug("Reap dead block at "PC_FMT"\n", block->pc); lightrec_unregister_block(state->block_cache, block); lightrec_free_block(state, block); } @@ -1528,6 +1548,8 @@ static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data) int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block) { + struct block *dead_blocks[ARRAY_SIZE(cstate->targets)]; + u32 was_dead[ARRAY_SIZE(cstate->targets) / 8]; struct lightrec_state *state = cstate->state; struct lightrec_branch_target *target; bool fully_tagged = false; @@ -1646,11 +1668,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, /* Add compiled function to the LUT */ lut_write(state, lut_offset(block->pc), block->function); - if (ENABLE_THREADED_COMPILER) - lightrec_reaper_continue(state->reaper); - /* Detect old blocks that have been covered by the new one */ - for (i = 0; i < cstate->nb_targets; i++) { + for (i = 0; ENABLE_THREADED_COMPILER && i < cstate->nb_targets; i++) { target = &cstate->targets[i]; if (!target->offset) @@ -1658,12 +1677,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, offset = block->pc + target->offset * sizeof(u32); - /* Pause the reaper while we search for the block until we set - * the BLOCK_IS_DEAD flag, otherwise the block may be removed - * under our feet. */ - if (ENABLE_THREADED_COMPILER) - lightrec_reaper_pause(state->reaper); - block2 = lightrec_find_block(state->block_cache, offset); if (block2) { /* No need to check if block2 is compilable - it must @@ -1672,17 +1685,26 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, /* Set the "block dead" flag to prevent the dynarec from * recompiling this block */ old_flags = block_set_flags(block2, BLOCK_IS_DEAD); + + if (old_flags & BLOCK_IS_DEAD) + was_dead[i / 32] |= BIT(i % 32); + else + was_dead[i / 32] &= ~BIT(i % 32); } - if (ENABLE_THREADED_COMPILER) { - lightrec_reaper_continue(state->reaper); + dead_blocks[i] = block2; - /* If block2 was pending for compilation, cancel it. - * If it's being compiled right now, wait until it - * finishes. */ - if (block2) - lightrec_recompiler_remove(state->rec, block2); - } + /* If block2 was pending for compilation, cancel it. + * If it's being compiled right now, wait until it finishes. */ + if (block2) + lightrec_recompiler_remove(state->rec, block2); + } + + for (i = 0; i < cstate->nb_targets; i++) { + target = &cstate->targets[i]; + + if (!target->offset) + continue; /* We know from now on that block2 (if present) isn't going to * be compiled. We can override the LUT entry with our new @@ -1690,15 +1712,21 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, offset = lut_offset(block->pc) + target->offset; lut_write(state, offset, jit_address(target->label)); + if (ENABLE_THREADED_COMPILER) { + block2 = dead_blocks[i]; + } else { + offset = block->pc + target->offset * sizeof(u32); + block2 = lightrec_find_block(state->block_cache, offset); + } if (block2) { - pr_debug("Reap block 0x%08x as it's covered by block " - "0x%08x\n", block2->pc, block->pc); + pr_debug("Reap block "X32_FMT" as it's covered by block " + X32_FMT"\n", block2->pc, block->pc); /* Finally, reap the block. */ if (!ENABLE_THREADED_COMPILER) { lightrec_unregister_block(state->block_cache, block2); lightrec_free_block(state, block2); - } else if (!(old_flags & BLOCK_IS_DEAD)) { + } else if (!(was_dead[i / 32] & BIT(i % 32))) { lightrec_reaper_add(state->reaper, lightrec_reap_block, block2); @@ -1706,8 +1734,11 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, } } + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_continue(state->reaper); + if (ENABLE_DISASSEMBLER) { - pr_debug("Compiling block at PC: 0x%08x\n", block->pc); + pr_debug("Compiling block at "PC_FMT"\n", block->pc); jit_disassemble(); } @@ -1717,7 +1748,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) { - pr_debug("Block PC 0x%08x is fully tagged" + pr_debug("Block "PC_FMT" is fully tagged" " - free opcode list\n", block->pc); if (ENABLE_THREADED_COMPILER) { @@ -1730,7 +1761,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, } if (oldjit) { - pr_debug("Block 0x%08x recompiled, reaping old jit context.\n", + pr_debug("Block "X32_FMT" recompiled, reaping old jit context.\n", block->pc); if (ENABLE_THREADED_COMPILER) { @@ -1868,11 +1899,12 @@ void lightrec_free_cstate(struct lightrec_cstate *cstate) } struct lightrec_state * lightrec_init(char *argv0, - const struct lightrec_mem_map *map, + const struct lightrec_mem_map *maps, size_t nb, const struct lightrec_ops *ops) { - const struct lightrec_mem_map *codebuf_map = &map[PSX_MAP_CODE_BUFFER]; + const struct lightrec_mem_map *codebuf_map = &maps[PSX_MAP_CODE_BUFFER]; + const struct lightrec_mem_map *map; struct lightrec_state *state; uintptr_t addr; void *tlsf = NULL; @@ -1910,7 +1942,7 @@ struct lightrec_state * lightrec_init(char *argv0, else lut_size = CODE_LUT_SIZE * sizeof(void *); - init_jit(argv0); + init_jit_with_debug(argv0, stdout); state = calloc(1, sizeof(*state) + lut_size); if (!state) @@ -1942,7 +1974,7 @@ struct lightrec_state * lightrec_init(char *argv0, } state->nb_maps = nb; - state->maps = map; + state->maps = maps; memcpy(&state->ops, ops, sizeof(*ops)); @@ -1960,21 +1992,21 @@ struct lightrec_state * lightrec_init(char *argv0, state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb; - map = &state->maps[PSX_MAP_BIOS]; + map = &maps[PSX_MAP_BIOS]; state->offset_bios = (uintptr_t)map->address - map->pc; - map = &state->maps[PSX_MAP_SCRATCH_PAD]; + map = &maps[PSX_MAP_SCRATCH_PAD]; state->offset_scratch = (uintptr_t)map->address - map->pc; - map = &state->maps[PSX_MAP_HW_REGISTERS]; + map = &maps[PSX_MAP_HW_REGISTERS]; state->offset_io = (uintptr_t)map->address - map->pc; - map = &state->maps[PSX_MAP_KERNEL_USER_RAM]; + map = &maps[PSX_MAP_KERNEL_USER_RAM]; state->offset_ram = (uintptr_t)map->address - map->pc; - if (state->maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 && - state->maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 && - state->maps[PSX_MAP_MIRROR3].address == map->address + 0x600000) + if (maps[PSX_MAP_MIRROR1].address == map->address + 0x200000 && + maps[PSX_MAP_MIRROR2].address == map->address + 0x400000 && + maps[PSX_MAP_MIRROR3].address == map->address + 0x600000) state->mirrors_mapped = true; if (state->offset_bios == 0 && @@ -2119,5 +2151,19 @@ struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state) void lightrec_set_cycles_per_opcode(struct lightrec_state *state, u32 cycles) { + if (state->cycles_per_op == cycles) + return; + state->cycles_per_op = cycles; + + if (ENABLE_THREADED_COMPILER) { + lightrec_recompiler_pause(state->rec); + lightrec_reaper_reap(state->reaper); + } + + lightrec_invalidate_all(state); + lightrec_free_all_blocks(state->block_cache); + + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_unpause(state->rec); }