X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightrec%2Flightrec.c;h=fa08a800d721df81d090e5d8329eb73efe7c49f7;hb=6ce0b00a79f4fa7c4a1735be9f48bfbc644b080c;hp=3a6e1129957c253779958aeaff01096e1ea3f4c3;hpb=b19ddc7982ef5935a3ed54cbc8525a06b3639e00;p=pcsx_rearmed.git diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 3a6e1129..fa08a800 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -16,6 +16,7 @@ #include "recompiler.h" #include "regcache.h" #include "optimizer.h" +#include "tlsf/tlsf.h" #include #include @@ -26,9 +27,6 @@ #include #include #include -#if ENABLE_TINYMM -#include -#endif #define GENMASK(h, l) \ (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h)))) @@ -198,30 +196,39 @@ static void lightrec_invalidate_map(struct lightrec_state *state, const struct lightrec_mem_map *map, u32 addr, u32 len) { if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) { - memset(&state->code_lut[lut_offset(addr)], 0, - ((len + 3) / 4) * sizeof(void *)); + memset(lut_address(state, lut_offset(addr)), 0, + ((len + 3) / 4) * lut_elm_size(state)); } } -const struct lightrec_mem_map * -lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr) { const struct lightrec_mem_map *map; unsigned int i; - u32 addr; for (i = 0; i < state->nb_maps; i++) { - const struct lightrec_mem_map *mapi = &state->maps[i]; + map = &state->maps[i]; - if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) { - map = mapi; - break; - } + if (kaddr >= map->pc && kaddr < map->pc + map->length) + return (enum psx_map) i; } - if (i == state->nb_maps) + return PSX_MAP_UNKNOWN; +} + +const struct lightrec_mem_map * +lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) +{ + const struct lightrec_mem_map *map; + enum psx_map idx; + u32 addr; + + idx = lightrec_get_map_idx(state, kaddr); + if (idx == PSX_MAP_UNKNOWN) return NULL; + map = &state->maps[idx]; addr = kaddr - map->pc; while (map->mirror_of) @@ -234,7 +241,7 @@ lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) } u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u16 *flags, struct block *block) + u32 addr, u32 data, u32 *flags, struct block *block) { const struct lightrec_mem_map *map; const struct lightrec_mem_map_ops *ops; @@ -249,16 +256,20 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, return 0; } - if (unlikely(map->ops)) { - if (flags) - *flags |= LIGHTREC_HW_IO; - ops = map->ops; - } else { - if (flags) - *flags |= LIGHTREC_DIRECT_IO; + if (likely(!map->ops)) { + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); ops = &lightrec_default_ops; + } else if (flags && + LIGHTREC_FLAGS_GET_IO_MODE(*flags) == LIGHTREC_IO_DIRECT_HW) { + ops = &lightrec_default_ops; + } else { + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + + ops = map->ops; } switch (op.i.op) { @@ -302,7 +313,7 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, } static void lightrec_rw_helper(struct lightrec_state *state, - union code op, u16 *flags, + union code op, u32 *flags, struct block *block) { u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs], @@ -318,14 +329,15 @@ static void lightrec_rw_helper(struct lightrec_state *state, case OP_LW: if (op.i.rt) state->regs.gpr[op.i.rt] = ret; - default: /* fall-through */ + fallthrough; + default: break; } } -static void lightrec_rw_cb(struct lightrec_state *state, union code op) +static void lightrec_rw_cb(struct lightrec_state *state, u32 arg) { - lightrec_rw_helper(state, op, NULL, NULL); + lightrec_rw_helper(state, (union code) arg, NULL, NULL); } static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) @@ -334,25 +346,31 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) struct opcode *op; bool was_tagged; u16 offset = (u16)arg; + u16 old_flags; block = lightrec_find_block_from_lut(state->block_cache, arg >> 16, state->next_pc); if (unlikely(!block)) { pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n", state->next_pc, offset); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return; } op = &block->opcode_list[offset]; - was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO); + was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags); lightrec_rw_helper(state, op->c, &op->flags, block); if (!was_tagged) { - pr_debug("Opcode of block at PC 0x%08x has been tagged - flag " - "for recompilation\n", block->pc); + old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); - block->flags |= BLOCK_SHOULD_RECOMPILE; + if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { + pr_debug("Opcode of block at PC 0x%08x has been tagged" + " - flag for recompilation\n", block->pc); + + lut_write(state, lut_offset(block->pc), NULL); + } } } @@ -361,6 +379,16 @@ static u32 clamp_s32(s32 val, s32 min, s32 max) return val < min ? min : val > max ? max : val; } +static u16 load_u16(u32 *ptr) +{ + return ((struct u16x2 *) ptr)->l; +} + +static void store_u16(u32 *ptr, u16 value) +{ + ((struct u16x2 *) ptr)->l = value; +} + static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg) { s16 gteir1, gteir2, gteir3; @@ -373,50 +401,58 @@ static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg) case 9: case 10: case 11: - return (s32)(s16) state->regs.cp2d[reg]; + return (s32)(s16) load_u16(&state->regs.cp2d[reg]); case 7: case 16: case 17: case 18: case 19: - return (u16) state->regs.cp2d[reg]; + return load_u16(&state->regs.cp2d[reg]); case 28: case 29: - gteir1 = (s16) state->regs.cp2d[9]; - gteir2 = (s16) state->regs.cp2d[10]; - gteir3 = (s16) state->regs.cp2d[11]; + gteir1 = (s16) load_u16(&state->regs.cp2d[9]); + gteir2 = (s16) load_u16(&state->regs.cp2d[10]); + gteir3 = (s16) load_u16(&state->regs.cp2d[11]); return clamp_s32(gteir1 >> 7, 0, 0x1f) << 0 | clamp_s32(gteir2 >> 7, 0, 0x1f) << 5 | clamp_s32(gteir3 >> 7, 0, 0x1f) << 10; case 15: reg = 14; - default: /* fall-through */ + fallthrough; + default: return state->regs.cp2d[reg]; } } u32 lightrec_mfc(struct lightrec_state *state, union code op) { + u32 val; + if (op.i.op == OP_CP0) return state->regs.cp0[op.r.rd]; else if (op.r.rs == OP_CP2_BASIC_MFC2) return lightrec_mfc2(state, op.r.rd); - else - return state->regs.cp2c[op.r.rd]; -} -static void lightrec_mfc_cb(struct lightrec_state *state, union code op) -{ - u32 rt = lightrec_mfc(state, op); + val = state->regs.cp2c[op.r.rd]; - if (op.r.rt) - state->regs.gpr[op.r.rt] = rt; + switch (op.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + return (u32)(s16)val; + default: + return val; + } } static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) { - u32 status, cause; + u32 status, oldstatus, cause; switch (reg) { case 1: @@ -426,12 +462,13 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) case 15: /* Those registers are read-only */ return; - default: /* fall-through */ + default: break; } if (reg == 12) { status = state->regs.cp0[12]; + oldstatus = status; if (status & ~data & BIT(16)) { state->ops.enable_ram(state, true); @@ -441,14 +478,24 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) } } - state->regs.cp0[reg] = data; + if (reg == 13) { + state->regs.cp0[13] &= ~0x300; + state->regs.cp0[13] |= data & 0x300; + } else { + state->regs.cp0[reg] = data; + } if (reg == 12 || reg == 13) { cause = state->regs.cp0[13]; status = state->regs.cp0[12]; + /* Handle software interrupts */ if (!!(status & cause & 0x300) & status) lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); + + /* Handle hardware interrupts */ + if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401)) + lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); } } @@ -489,7 +536,8 @@ static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data) return; case 30: state->regs.cp2d[31] = count_leading_bits((s32) data); - default: /* fall-through */ + fallthrough; + default: state->regs.cp2d[reg] = data; break; } @@ -505,15 +553,15 @@ static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data) case 27: case 29: case 30: - data = (s32)(s16) data; + store_u16(&state->regs.cp2c[reg], data); break; case 31: data = (data & 0x7ffff000) | !!(data & 0x7f87e000) << 31; - default: /* fall-through */ + fallthrough; + default: + state->regs.cp2c[reg] = data; break; } - - state->regs.cp2c[reg] = data; } void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) @@ -526,8 +574,10 @@ void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) lightrec_mtc2(state, op.r.rd, data); } -static void lightrec_mtc_cb(struct lightrec_state *state, union code op) +static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg) { + union code op = (union code) arg; + lightrec_mtc(state, op, state->regs.gpr[op.r.rt]); } @@ -555,31 +605,31 @@ void lightrec_cp(struct lightrec_state *state, union code op) (*state->ops.cop2_op)(state, op.opcode); } -static void lightrec_syscall_cb(struct lightrec_state *state, union code op) -{ - lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL); -} - -static void lightrec_break_cb(struct lightrec_state *state, union code op) +static void lightrec_cp_cb(struct lightrec_state *state, u32 arg) { - lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK); + lightrec_cp(state, (union code) arg); } -struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) +static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) { struct block *block = lightrec_find_block(state->block_cache, pc); + u8 old_flags; if (block && lightrec_block_is_outdated(state, block)) { pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); - /* Make sure the recompiler isn't processing the block we'll - * destroy */ - if (ENABLE_THREADED_COMPILER) - lightrec_recompiler_remove(state->rec, block); + old_flags = block_set_flags(block, BLOCK_IS_DEAD); + if (!(old_flags & BLOCK_IS_DEAD)) { + /* Make sure the recompiler isn't processing the block + * we'll destroy */ + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + lightrec_unregister_block(state->block_cache, block); + remove_from_code_lut(state->block_cache, block); + lightrec_free_block(state, block); + } - lightrec_unregister_block(state->block_cache, block); - remove_from_code_lut(state->block_cache, block); - lightrec_free_block(state, block); block = NULL; } @@ -602,9 +652,10 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) struct block *block; bool should_recompile; void *func; + int err; for (;;) { - func = state->code_lut[lut_offset(pc)]; + func = lut_read(state, lut_offset(pc)); if (func && func != state->get_next_block) break; @@ -613,23 +664,27 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (unlikely(!block)) break; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) { + if (OPT_REPLACE_MEMSET && + block_has_flag(block, BLOCK_IS_MEMSET)) { func = state->memset_func; break; } - should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE && - !(block->flags & BLOCK_IS_DEAD); + should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) && + !block_has_flag(block, BLOCK_IS_DEAD); if (unlikely(should_recompile)) { pr_debug("Block at PC 0x%08x should recompile\n", pc); - lightrec_unregister(MEM_FOR_CODE, block->code_size); - - if (ENABLE_THREADED_COMPILER) + if (ENABLE_THREADED_COMPILER) { lightrec_recompiler_add(state->rec, block); - else - lightrec_compile_block(state->cstate, block); + } else { + err = lightrec_compile_block(state->cstate, block); + if (err) { + state->exit_flags = LIGHTREC_EXIT_NOMEM; + return NULL; + } + } } if (ENABLE_THREADED_COMPILER && likely(!should_recompile)) @@ -640,18 +695,31 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (likely(func)) break; - if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) { + if (unlikely(block_has_flag(block, BLOCK_NEVER_COMPILE))) { pc = lightrec_emulate_block(state, block, pc); } else if (!ENABLE_THREADED_COMPILER) { /* Block wasn't compiled yet - run the interpreter */ - if (block->flags & BLOCK_FULLY_TAGGED) + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) pr_debug("Block fully tagged, skipping first pass\n"); else if (ENABLE_FIRST_PASS && likely(!should_recompile)) pc = lightrec_emulate_block(state, block, pc); /* Then compile it using the profiled data */ - lightrec_compile_block(state->cstate, block); + err = lightrec_compile_block(state->cstate, block); + if (err) { + state->exit_flags = LIGHTREC_EXIT_NOMEM; + return NULL; + } + } else if (unlikely(block_has_flag(block, BLOCK_IS_DEAD))) { + /* + * If the block is dead but has never been compiled, + * then its function pointer is NULL and we cannot + * execute the block. In that case, reap all the dead + * blocks now, and in the next loop we will create a + * new block. + */ + lightrec_reaper_reap(state->reaper); } else { lightrec_recompiler_add(state->rec, block); } @@ -665,15 +733,104 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) return func; } -static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, - void (*f)(struct lightrec_state *, u32 d), - u32 d) +static void * lightrec_alloc_code(struct lightrec_state *state, size_t size) { - state->current_cycle = state->target_cycle - cycles_delta; + void *code; - (*f)(state, d); + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + code = tlsf_malloc(state->tlsf, size); - return state->target_cycle - state->current_cycle; + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); + + return code; +} + +static void lightrec_realloc_code(struct lightrec_state *state, + void *ptr, size_t size) +{ + /* NOTE: 'size' MUST be smaller than the size specified during + * the allocation. */ + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + tlsf_realloc(state->tlsf, ptr, size); + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); +} + +static void lightrec_free_code(struct lightrec_state *state, void *ptr) +{ + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + tlsf_free(state->tlsf, ptr); + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); +} + +static void * lightrec_emit_code(struct lightrec_state *state, + const struct block *block, + jit_state_t *_jit, unsigned int *size) +{ + bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf; + jit_word_t code_size, new_code_size; + void *code; + + jit_realize(); + + if (!ENABLE_DISASSEMBLER) + jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + + if (has_code_buffer) { + jit_get_code(&code_size); + code = lightrec_alloc_code(state, (size_t) code_size); + + if (!code) { + if (ENABLE_THREADED_COMPILER) { + /* If we're using the threaded compiler, return + * an allocation error here. The threaded + * compiler will then empty its job queue and + * request a code flush using the reaper. */ + return NULL; + } + + /* Remove outdated blocks, and try again */ + lightrec_remove_outdated_blocks(state->block_cache, block); + + pr_debug("Re-try to alloc %zu bytes...\n", code_size); + + code = lightrec_alloc_code(state, code_size); + if (!code) { + pr_err("Could not alloc even after removing old blocks!\n"); + return NULL; + } + } + + jit_set_code(code, code_size); + } + + code = jit_emit(); + + jit_get_code(&new_code_size); + lightrec_register(MEM_FOR_CODE, new_code_size); + + if (has_code_buffer) { + lightrec_realloc_code(state, code, (size_t) new_code_size); + + pr_debug("Creating code block at address 0x%" PRIxPTR ", " + "code size: %" PRIuPTR " new: %" PRIuPTR "\n", + (uintptr_t) code, code_size, new_code_size); + } + + *size = (unsigned int) new_code_size; + + return code; } static struct block * generate_wrapper(struct lightrec_state *state) @@ -681,9 +838,8 @@ static struct block * generate_wrapper(struct lightrec_state *state) struct block *block; jit_state_t *_jit; unsigned int i; - int stack_ptr; - jit_word_t code_size; - jit_node_t *to_tramp, *to_fn_epilog; + jit_node_t *addr[C_WRAPPERS_COUNT - 1]; + jit_node_t *to_end[C_WRAPPERS_COUNT - 1]; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -698,53 +854,82 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* Wrapper entry point */ jit_prolog(); + jit_tramp(256); - stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS); - - for (i = 0; i < NUM_TEMPS; i++) - jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); - - /* Jump to the trampoline */ - to_tramp = jit_jmpi(); + /* Add entry points */ + for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { + jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[i])); + to_end[i - 1] = jit_b(); + addr[i - 1] = jit_indirect(); + } - /* The trampoline will jump back here */ - to_fn_epilog = jit_label(); + jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[0])); - for (i = 0; i < NUM_TEMPS; i++) - jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t)); + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + jit_patch(to_end[i]); - jit_ret(); jit_epilog(); - - /* Trampoline entry point. - * The sole purpose of the trampoline is to cheese Lightning not to - * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we - * do want to return to the caller with this register modified. */ jit_prolog(); - jit_tramp(256); - jit_patch(to_tramp); + + /* Save all temporaries on stack */ + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]), + LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP)); + } + } + + jit_getarg(JIT_R2, jit_arg()); jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); - jit_pushargr(LIGHTREC_REG_CYCLE); - jit_pushargr(JIT_R0); - jit_pushargr(JIT_R1); - jit_finishi(c_function_wrapper); - jit_retval_i(LIGHTREC_REG_CYCLE); + jit_pushargr(JIT_R2); + + jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + + /* state->current_cycle = state->target_cycle - delta; */ + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE); + + /* Call the wrapper function */ + jit_finishr(JIT_R1); + + /* delta = state->target_cycle - state->current_cycle */; + jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE); + + /* Restore temporaries from stack */ + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, wrapper_regs[i])); + } + } - jit_patch_at(jit_jmpi(), to_fn_epilog); + jit_ret(); jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); + block->function = lightrec_emit_code(state, block, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; + + state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function; - block->code_size = code_size; + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + state->wrappers_eps[i] = jit_address(addr[i]); if (ENABLE_DISASSEMBLER) { pr_debug("Wrapper block:\n"); @@ -789,10 +974,9 @@ static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3; + jit_node_t *to_end, *loop, *addr, *addr2, *addr3; unsigned int i; - u32 offset, ram_len; - jit_word_t code_size; + u32 offset; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -808,12 +992,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_prolog(); jit_frame(256); - jit_getarg(JIT_R0, jit_arg()); + jit_getarg(JIT_V1, jit_arg()); jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg()); /* Force all callee-saved registers to be pushed on the stack */ for (i = 0; i < NUM_REGS; i++) - jit_movr(JIT_V(i), JIT_V(i)); + jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG)); /* Pass lightrec_state structure to blocks, using the last callee-saved * register that Lightning provides */ @@ -822,13 +1006,15 @@ static struct block * generate_dispatcher(struct lightrec_state *state) loop = jit_label(); /* Call the block's code */ - jit_jmpr(JIT_R0); + jit_jmpr(JIT_V1); if (OPT_REPLACE_MEMSET) { /* Blocks will jump here when they need to call * lightrec_memset() */ addr3 = jit_indirect(); + jit_movr(JIT_V1, LIGHTREC_REG_CYCLE); + jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_finishi(lightrec_memset); @@ -836,8 +1022,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, regs.gpr[31])); - jit_retval(JIT_R0); - jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0); + jit_retval(LIGHTREC_REG_CYCLE); + jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); } /* The block will jump here, with the number of cycles remaining in @@ -852,42 +1038,53 @@ static struct block * generate_dispatcher(struct lightrec_state *state) to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); /* Convert next PC to KUNSEG and avoid mirrors */ - ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length; - jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1)); - to_c = jit_bgei(JIT_R0, ram_len); - - /* Fast path: code is running from RAM, use the code LUT */ - if (__WORDSIZE == 64) - jit_lshi(JIT_R0, JIT_R0, 1); - jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE); - jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut)); + jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); + jit_rshi_u(JIT_R1, JIT_V1, 28); + jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1); + jit_addi(JIT_R2, JIT_R2, RAM_SIZE); + jit_movnr(JIT_V1, JIT_R2, JIT_R1); + + /* If possible, use the code LUT */ + if (!lut_is_32bit(state)) + jit_lshi(JIT_V1, JIT_V1, 1); + jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE); + + offset = offsetof(struct lightrec_state, code_lut); + if (lut_is_32bit(state)) + jit_ldxi_ui(JIT_V1, JIT_V1, offset); + else + jit_ldxi(JIT_V1, JIT_V1, offset); /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); + + /* The code LUT will be set to this address when the block at the target + * PC has been preprocessed but not yet compiled by the threaded + * recompiler */ + addr = jit_indirect(); /* Slow path: call C function get_next_block_func() */ - jit_patch(to_c); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* We may call the interpreter - update state->current_cycle */ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, target_cycle)); - jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); + jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE); jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, JIT_R1); + LIGHTREC_REG_STATE, JIT_V1); } - /* The code LUT will be set to this address when the block at the target - * PC has been preprocessed but not yet compiled by the threaded - * recompiler */ - addr = jit_indirect(); - - /* Get the next block */ jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(JIT_V0); + + /* Save the cycles register if needed */ + if (!(ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES)) + jit_movr(JIT_V0, LIGHTREC_REG_CYCLE); + + /* Get the next block */ jit_finishi(&get_next_block_func); - jit_retval(JIT_R0); + jit_retval(JIT_V1); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* The interpreter may have updated state->current_cycle and @@ -897,10 +1094,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, target_cycle)); jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); + } else { + jit_movr(LIGHTREC_REG_CYCLE, JIT_V0); } /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); /* When exiting, the recompiled code will jump to that address */ jit_note(__FILE__, __LINE__); @@ -910,15 +1109,14 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; + block->function = lightrec_emit_code(state, block, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; state->eob_wrapper_func = jit_address(addr2); if (OPT_REPLACE_MEMSET) @@ -943,12 +1141,12 @@ err_no_mem: union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) { - void *host; + void *host = NULL; lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *)host; - return (union code) *code; + return (union code) LE32TOH(*code); } unsigned int lightrec_cycles_of_opcode(union code code) @@ -956,11 +1154,13 @@ unsigned int lightrec_cycles_of_opcode(union code code) return 2; } -void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block) +void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops) { + struct opcode_list *list = container_of(ops, struct opcode_list, ops); + lightrec_free(state, MEM_FOR_IR, - sizeof(*block->opcode_list) * block->nb_ops, - block->opcode_list); + sizeof(*list) + list->nb_ops * sizeof(struct opcode), + list); } static unsigned int lightrec_get_mips_block_len(const u32 *src) @@ -982,25 +1182,28 @@ static unsigned int lightrec_get_mips_block_len(const u32 *src) static struct opcode * lightrec_disassemble(struct lightrec_state *state, const u32 *src, unsigned int *len) { - struct opcode *list; + struct opcode_list *list; unsigned int i, length; length = lightrec_get_mips_block_len(src); - list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length); + list = lightrec_malloc(state, MEM_FOR_IR, + sizeof(*list) + sizeof(struct opcode) * length); if (!list) { pr_err("Unable to allocate memory\n"); return NULL; } + list->nb_ops = (u16) length; + for (i = 0; i < length; i++) { - list[i].opcode = LE32TOH(src[i]); - list[i].flags = 0; + list->ops[i].opcode = LE32TOH(src[i]); + list->ops[i].flags = 0; } *len = length * sizeof(u32); - return list; + return list->ops; } static struct block * lightrec_precompile_block(struct lightrec_state *state, @@ -1008,11 +1211,12 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, { struct opcode *list; struct block *block; - void *host; + void *host, *addr; const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *) host; unsigned int length; bool fully_tagged; + u8 block_flags = 0; if (!map) return NULL; @@ -1037,9 +1241,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, block->next = NULL; block->flags = 0; block->code_size = 0; -#if ENABLE_THREADED_COMPILER - block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT; -#endif + block->precompile_date = state->current_cycle; block->nb_ops = length / sizeof(u32); lightrec_optimize(state, block); @@ -1058,17 +1260,23 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, /* If the first opcode is an 'impossible' branch, never compile the * block */ if (should_emulate(block->opcode_list)) - block->flags |= BLOCK_NEVER_COMPILE; + block_flags |= BLOCK_NEVER_COMPILE; fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) - block->flags |= BLOCK_FULLY_TAGGED; + block_flags |= BLOCK_FULLY_TAGGED; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) - state->code_lut[lut_offset(pc)] = state->memset_func; + if (block_flags) + block_set_flags(block, block_flags); block->hash = lightrec_calculate_block_hash(block); + if (OPT_REPLACE_MEMSET && block_has_flag(block, BLOCK_IS_MEMSET)) + addr = state->memset_func; + else + addr = state->get_next_block; + lut_write(state, lut_offset(pc), addr); + pr_debug("Recompile count: %u\n", state->nb_precompile++); return block; @@ -1100,10 +1308,10 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) case OP_SWR: case OP_LWC2: case OP_SWC2: - if (!(op->flags & (LIGHTREC_DIRECT_IO | - LIGHTREC_HW_IO))) + if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags)) return false; - default: /* fall-through */ + fallthrough; + default: continue; } } @@ -1125,35 +1333,56 @@ static void lightrec_reap_jit(struct lightrec_state *state, void *data) _jit_destroy_state(data); } +static void lightrec_free_function(struct lightrec_state *state, void *fn) +{ + if (ENABLE_CODE_BUFFER && state->tlsf) { + pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn); + lightrec_free_code(state, fn); + } +} + +static void lightrec_reap_function(struct lightrec_state *state, void *data) +{ + lightrec_free_function(state, data); +} + +static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data) +{ + lightrec_free_opcode_list(state, data); +} + int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block) { struct lightrec_state *state = cstate->state; struct lightrec_branch_target *target; - bool op_list_freed = false, fully_tagged = false; + bool fully_tagged = false; struct block *block2; struct opcode *elm; jit_state_t *_jit, *oldjit; jit_node_t *start_of_block; bool skip_next = false; - jit_word_t code_size; + void *old_fn, *new_fn; + size_t old_code_size; unsigned int i, j; + u8 old_flags; u32 offset; fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) - block->flags |= BLOCK_FULLY_TAGGED; + block_set_flags(block, BLOCK_FULLY_TAGGED); _jit = jit_new_state(); if (!_jit) return -ENOMEM; oldjit = block->_jit; + old_fn = block->function; + old_code_size = block->code_size; block->_jit = _jit; lightrec_regcache_reset(cstate->reg_cache); cstate->cycles = 0; - cstate->nb_branches = 0; cstate->nb_local_branches = 0; cstate->nb_targets = 0; @@ -1170,18 +1399,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, continue; } - cstate->cycles += lightrec_cycles_of_opcode(elm->c); - if (should_emulate(elm)) { pr_debug("Branch at offset 0x%x will be emulated\n", i << 2); lightrec_emit_eob(cstate, block, i, false); - skip_next = !(elm->flags & LIGHTREC_NO_DS); + skip_next = !op_flag_no_ds(elm->flags); } else { lightrec_rec_opcode(cstate, block, i); - skip_next = has_delay_slot(elm->c) && - !(elm->flags & LIGHTREC_NO_DS); + skip_next = !op_flag_no_ds(elm->flags) && has_delay_slot(elm->c); #if _WIN32 /* FIXME: GNU Lightning on Windows seems to use our * mapped registers as temporaries. Until the actual bug @@ -1190,10 +1416,9 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, lightrec_regcache_mark_live(cstate->reg_cache, _jit); #endif } - } - for (i = 0; i < cstate->nb_branches; i++) - jit_patch(cstate->branches[i]); + cstate->cycles += lightrec_cycles_of_opcode(elm->c); + } for (i = 0; i < cstate->nb_local_branches; i++) { struct lightrec_branch *branch = &cstate->local_branches[i]; @@ -1218,26 +1443,32 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_err("Unable to find branch target\n"); } - jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, eob_wrapper_func)); - - jit_jmpr(JIT_R0); - jit_ret(); jit_epilog(); - block->function = jit_emit(); - block->flags &= ~BLOCK_SHOULD_RECOMPILE; + new_fn = lightrec_emit_code(state, block, _jit, &block->code_size); + if (!new_fn) { + if (!ENABLE_THREADED_COMPILER) + pr_err("Unable to compile block!\n"); + block->_jit = oldjit; + jit_clear_state(); + _jit_destroy_state(_jit); + return -ENOMEM; + } + + /* Pause the reaper, because lightrec_reset_lut_offset() may try to set + * the old block->function pointer to the code LUT. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); + + block->function = new_fn; + block_clear_flags(block, BLOCK_SHOULD_RECOMPILE); /* Add compiled function to the LUT */ - state->code_lut[lut_offset(block->pc)] = block->function; + lut_write(state, lut_offset(block->pc), block->function); - if (ENABLE_THREADED_COMPILER) { - /* Since we might try to reap the same block multiple times, - * we need the reaper to wait until everything has been - * submitted, so that the duplicate entries can be dropped. */ - lightrec_reaper_pause(state->reaper); - } + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_continue(state->reaper); /* Detect old blocks that have been covered by the new one */ for (i = 0; i < cstate->nb_targets; i++) { @@ -1247,6 +1478,13 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, continue; offset = block->pc + target->offset * sizeof(u32); + + /* Pause the reaper while we search for the block until we set + * the BLOCK_IS_DEAD flag, otherwise the block may be removed + * under our feet. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); + block2 = lightrec_find_block(state->block_cache, offset); if (block2) { /* No need to check if block2 is compilable - it must @@ -1254,43 +1492,41 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, /* Set the "block dead" flag to prevent the dynarec from * recompiling this block */ - block2->flags |= BLOCK_IS_DEAD; + old_flags = block_set_flags(block2, BLOCK_IS_DEAD); + } + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_continue(state->reaper); /* If block2 was pending for compilation, cancel it. * If it's being compiled right now, wait until it * finishes. */ - if (ENABLE_THREADED_COMPILER) + if (block2) lightrec_recompiler_remove(state->rec, block2); + } - /* We know from now on that block2 isn't going to be - * compiled. We can override the LUT entry with our - * new block's entry point. */ - offset = lut_offset(block->pc) + target->offset; - state->code_lut[offset] = jit_address(target->label); + /* We know from now on that block2 (if present) isn't going to + * be compiled. We can override the LUT entry with our new + * block's entry point. */ + offset = lut_offset(block->pc) + target->offset; + lut_write(state, offset, jit_address(target->label)); + if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " "0x%08x\n", block2->pc, block->pc); /* Finally, reap the block. */ - if (ENABLE_THREADED_COMPILER) { + if (!ENABLE_THREADED_COMPILER) { + lightrec_unregister_block(state->block_cache, block2); + lightrec_free_block(state, block2); + } else if (!(old_flags & BLOCK_IS_DEAD)) { lightrec_reaper_add(state->reaper, lightrec_reap_block, block2); - } else { - lightrec_unregister_block(state->block_cache, block2); - lightrec_free_block(state, block2); } } } - if (ENABLE_THREADED_COMPILER) - lightrec_reaper_continue(state->reaper); - - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; - if (ENABLE_DISASSEMBLER) { pr_debug("Compiling block at PC: 0x%08x\n", block->pc); jit_disassemble(); @@ -1298,26 +1534,37 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, jit_clear_state(); -#if ENABLE_THREADED_COMPILER if (fully_tagged) - op_list_freed = atomic_flag_test_and_set(&block->op_list_freed); -#endif - if (fully_tagged && !op_list_freed) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) { pr_debug("Block PC 0x%08x is fully tagged" " - free opcode list\n", block->pc); - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_add(state->reaper, + lightrec_reap_opcode_list, + block->opcode_list); + } else { + lightrec_free_opcode_list(state, block->opcode_list); + } } if (oldjit) { pr_debug("Block 0x%08x recompiled, reaping old jit context.\n", block->pc); - if (ENABLE_THREADED_COMPILER) + if (ENABLE_THREADED_COMPILER) { lightrec_reaper_add(state->reaper, lightrec_reap_jit, oldjit); - else + lightrec_reaper_add(state->reaper, + lightrec_reap_function, old_fn); + } else { _jit_destroy_state(oldjit); + lightrec_free_function(state, old_fn); + } + + lightrec_unregister(MEM_FOR_CODE, old_code_size); } return 0; @@ -1370,20 +1617,24 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) return state->next_pc; } -u32 lightrec_execute_one(struct lightrec_state *state, u32 pc) -{ - return lightrec_execute(state, pc, state->current_cycle); -} - -u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) +u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc, + u32 target_cycle) { - struct block *block = lightrec_get_block(state, pc); - if (!block) - return 0; + struct block *block; state->exit_flags = LIGHTREC_EXIT_NORMAL; + state->target_cycle = target_cycle; + + do { + block = lightrec_get_block(state, pc); + if (!block) + break; - pc = lightrec_emulate_block(state, block, pc); + pc = lightrec_emulate_block(state, block, pc); + + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_reap(state->reaper); + } while (state->current_cycle < state->target_cycle); if (LOG_LEVEL >= INFO_L) lightrec_print_info(state); @@ -1393,12 +1644,19 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) void lightrec_free_block(struct lightrec_state *state, struct block *block) { + u8 old_flags; + lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32)); - if (block->opcode_list) - lightrec_free_opcode_list(state, block); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + lightrec_free_opcode_list(state, block->opcode_list); if (block->_jit) _jit_destroy_state(block->_jit); - lightrec_unregister(MEM_FOR_CODE, block->code_size); + if (block->function) { + lightrec_free_function(state, block->function); + lightrec_unregister(MEM_FOR_CODE, block->code_size); + } lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); } @@ -1432,7 +1690,12 @@ struct lightrec_state * lightrec_init(char *argv0, size_t nb, const struct lightrec_ops *ops) { + const struct lightrec_mem_map *codebuf_map = &map[PSX_MAP_CODE_BUFFER]; struct lightrec_state *state; + uintptr_t addr; + void *tlsf = NULL; + bool with_32bit_lut = false; + size_t lut_size; /* Sanity-check ops */ if (!ops || !ops->cop2_op || !ops->enable_ram) { @@ -1440,25 +1703,40 @@ struct lightrec_state * lightrec_init(char *argv0, return NULL; } + if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER + && codebuf_map->address) { + tlsf = tlsf_create_with_pool(codebuf_map->address, + codebuf_map->length); + if (!tlsf) { + pr_err("Unable to initialize code buffer\n"); + return NULL; + } + + if (__WORDSIZE == 64) { + addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1; + with_32bit_lut = addr == (u32) addr; + } + } + + if (with_32bit_lut) + lut_size = CODE_LUT_SIZE * 4; + else + lut_size = CODE_LUT_SIZE * sizeof(void *); + init_jit(argv0); - state = calloc(1, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + state = calloc(1, sizeof(*state) + lut_size); if (!state) goto err_finish_jit; - lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size); -#if ENABLE_TINYMM - state->tinymm = tinymm_init(malloc, free, 4096); - if (!state->tinymm) - goto err_free_state; -#endif + state->tlsf = tlsf; + state->with_32bit_lut = with_32bit_lut; state->block_cache = lightrec_blockcache_init(state); if (!state->block_cache) - goto err_free_tinymm; + goto err_free_state; if (ENABLE_THREADED_COMPILER) { state->rec = lightrec_recompiler_init(state); @@ -1487,15 +1765,10 @@ struct lightrec_state * lightrec_init(char *argv0, if (!state->c_wrapper_block) goto err_free_dispatcher; - state->c_wrapper = state->c_wrapper_block->function; - state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb; state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb; - state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb; state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; - state->c_wrappers[C_WRAPPER_CP] = lightrec_cp; - state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb; - state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb; + state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb; map = &state->maps[PSX_MAP_BIOS]; state->offset_bios = (uintptr_t)map->address - map->pc; @@ -1503,6 +1776,9 @@ struct lightrec_state * lightrec_init(char *argv0, map = &state->maps[PSX_MAP_SCRATCH_PAD]; state->offset_scratch = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_HW_REGISTERS]; + state->offset_io = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_KERNEL_USER_RAM]; state->offset_ram = (uintptr_t)map->address - map->pc; @@ -1514,12 +1790,16 @@ struct lightrec_state * lightrec_init(char *argv0, if (state->offset_bios == 0 && state->offset_scratch == 0 && state->offset_ram == 0 && + state->offset_io == 0 && state->mirrors_mapped) { pr_info("Memory map is perfect. Emitted code will be best.\n"); } else { pr_info("Memory map is sub-par. Emitted code will be slow.\n"); } + if (state->with_32bit_lut) + pr_info("Using 32-bit LUT\n"); + return state; err_free_dispatcher: @@ -1534,16 +1814,14 @@ err_free_recompiler: lightrec_free_cstate(state->cstate); err_free_block_cache: lightrec_free_block_cache(state->block_cache); -err_free_tinymm: -#if ENABLE_TINYMM - tinymm_shutdown(state->tinymm); err_free_state: -#endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); err_finish_jit: finish_jit(); + if (ENABLE_CODE_BUFFER && tlsf) + tlsf_destroy(tlsf); return NULL; } @@ -1553,6 +1831,10 @@ void lightrec_destroy(struct lightrec_state *state) state->current_cycle = ~state->current_cycle; lightrec_print_info(state); + lightrec_free_block_cache(state->block_cache); + lightrec_free_block(state, state->dispatcher); + lightrec_free_block(state, state->c_wrapper_block); + if (ENABLE_THREADED_COMPILER) { lightrec_free_recompiler(state->rec); lightrec_reaper_destroy(state->reaper); @@ -1560,38 +1842,40 @@ void lightrec_destroy(struct lightrec_state *state) lightrec_free_cstate(state->cstate); } - lightrec_free_block_cache(state->block_cache); - lightrec_free_block(state, state->dispatcher); - lightrec_free_block(state, state->c_wrapper_block); finish_jit(); + if (ENABLE_CODE_BUFFER && state->tlsf) + tlsf_destroy(state->tlsf); -#if ENABLE_TINYMM - tinymm_shutdown(state->tinymm); -#endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); } void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len) { u32 kaddr = kunseg(addr & ~0x3); - const struct lightrec_mem_map *map = lightrec_get_map(state, NULL, kaddr); - - if (map) { - if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM]) - return; + enum psx_map idx = lightrec_get_map_idx(state, kaddr); + switch (idx) { + case PSX_MAP_MIRROR1: + case PSX_MAP_MIRROR2: + case PSX_MAP_MIRROR3: /* Handle mirrors */ - kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1); - - lightrec_invalidate_map(state, map, kaddr, len); + kaddr &= RAM_SIZE - 1; + fallthrough; + case PSX_MAP_KERNEL_USER_RAM: + break; + default: + return; } + + memset(lut_address(state, lut_offset(kaddr)), 0, + ((len + 3) / 4) * lut_elm_size(state)); } void lightrec_invalidate_all(struct lightrec_state *state) { - memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE); + memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE); } void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only)