X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightrec%2Flightrec.c;h=d5b1de968c57829b43a7abaabf1857fbc0fee90f;hb=2ec79b77bf0925c69ece6e51fc219a1e783aa432;hp=ffa40f0938af4634c1607ad014cac6e269ceaa3f;hpb=03535202b4b624c534340322646fb7f4062e3f53;p=pcsx_rearmed.git diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index ffa40f09..d5b1de96 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -28,9 +28,6 @@ #include #include -#define GENMASK(h, l) \ - (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h)))) - static struct block * lightrec_precompile_block(struct lightrec_state *state, u32 pc); static bool lightrec_block_is_fully_tagged(const struct block *block); @@ -107,7 +104,7 @@ static void lightrec_swl(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { unsigned int shift = addr & 0x3; - unsigned int mask = GENMASK(31, (shift + 1) * 8); + unsigned int mask = shift < 3 ? GENMASK(31, (shift + 1) * 8) : 0; u32 old_data; /* Align to 32 bits */ @@ -171,7 +168,7 @@ static u32 lightrec_lwr(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { unsigned int shift = addr & 0x3; - unsigned int mask = GENMASK(31, 32 - shift * 8); + unsigned int mask = shift ? GENMASK(31, 32 - shift * 8) : 0; u32 old_data; /* Align to 32 bits */ @@ -201,7 +198,7 @@ static void lightrec_invalidate_map(struct lightrec_state *state, } } -enum psx_map +static enum psx_map lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr) { const struct lightrec_mem_map *map; @@ -240,32 +237,64 @@ lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) return map; } -u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u32 *flags, struct block *block) +u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base, + u32 data, u32 *flags, struct block *block, u16 offset) { const struct lightrec_mem_map *map; const struct lightrec_mem_map_ops *ops; u32 opcode = op.opcode; + bool was_tagged = true; + u16 old_flags; + u32 addr; void *host; - addr += (s16) op.i.imm; + addr = kunseg(base + (s16) op.i.imm); - map = lightrec_get_map(state, &host, kunseg(addr)); + map = lightrec_get_map(state, &host, addr); if (!map) { __segfault_cb(state, addr, block); return 0; } - if (unlikely(map->ops)) { + if (flags) + was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(*flags); + + if (likely(!map->ops)) { + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) { + /* Force parallel port accesses as HW accesses, because + * the direct-I/O emitters can't differenciate it. */ + if (unlikely(map == &state->maps[PSX_MAP_PARALLEL_PORT])) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + /* If the base register is 0x0, be extra suspicious. + * Some games (e.g. Sled Storm) actually do segmentation + * faults by using uninitialized pointers, which are + * later initialized to point to hardware registers. */ + else if (op.i.rs && base == 0x0) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + else + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + } + + ops = &lightrec_default_ops; + } else if (flags && + LIGHTREC_FLAGS_GET_IO_MODE(*flags) == LIGHTREC_IO_DIRECT_HW) { + ops = &lightrec_default_ops; + } else { if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); ops = map->ops; - } else { - if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) - *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + } - ops = &lightrec_default_ops; + if (!was_tagged) { + old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); + + if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { + pr_debug("Opcode of block at PC 0x%08x has been tagged" + " - flag for recompilation\n", block->pc); + + lut_write(state, lut_offset(block->pc), NULL); + } } switch (op.i.op) { @@ -310,10 +339,10 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, static void lightrec_rw_helper(struct lightrec_state *state, union code op, u32 *flags, - struct block *block) + struct block *block, u16 offset) { u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs], - state->regs.gpr[op.i.rt], flags, block); + state->regs.gpr[op.i.rt], flags, block, offset); switch (op.i.op) { case OP_LB: @@ -323,8 +352,12 @@ static void lightrec_rw_helper(struct lightrec_state *state, case OP_LWL: case OP_LWR: case OP_LW: - if (op.i.rt) + if (OPT_HANDLE_LOAD_DELAYS && unlikely(!state->in_delay_slot_n)) { + state->temp_reg = ret; + state->in_delay_slot_n = 0xff; + } else if (op.i.rt) { state->regs.gpr[op.i.rt] = ret; + } fallthrough; default: break; @@ -333,14 +366,13 @@ static void lightrec_rw_helper(struct lightrec_state *state, static void lightrec_rw_cb(struct lightrec_state *state, u32 arg) { - lightrec_rw_helper(state, (union code) arg, NULL, NULL); + lightrec_rw_helper(state, (union code) arg, NULL, NULL, 0); } static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) { struct block *block; struct opcode *op; - bool was_tagged; u16 offset = (u16)arg; block = lightrec_find_block_from_lut(state->block_cache, @@ -348,21 +380,12 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) if (unlikely(!block)) { pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n", state->next_pc, offset); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return; } op = &block->opcode_list[offset]; - was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags); - - lightrec_rw_helper(state, op->c, &op->flags, block); - - if (!was_tagged) { - pr_debug("Opcode of block at PC 0x%08x has been tagged - flag " - "for recompilation\n", block->pc); - - block->flags |= BLOCK_SHOULD_RECOMPILE; - lut_write(state, lut_offset(block->pc), NULL); - } + lightrec_rw_helper(state, op->c, &op->flags, block, offset); } static u32 clamp_s32(s32 val, s32 min, s32 max) @@ -418,12 +441,47 @@ static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg) u32 lightrec_mfc(struct lightrec_state *state, union code op) { + u32 val; + if (op.i.op == OP_CP0) return state->regs.cp0[op.r.rd]; - else if (op.r.rs == OP_CP2_BASIC_MFC2) - return lightrec_mfc2(state, op.r.rd); - else - return state->regs.cp2c[op.r.rd]; + + if (op.i.op == OP_SWC2) { + val = lightrec_mfc2(state, op.i.rt); + } else if (op.r.rs == OP_CP2_BASIC_MFC2) + val = lightrec_mfc2(state, op.r.rd); + else { + val = state->regs.cp2c[op.r.rd]; + + switch (op.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + val = (u32)(s16)val; + fallthrough; + default: + break; + } + } + + if (state->ops.cop2_notify) + (*state->ops.cop2_notify)(state, op.opcode, val); + + return val; +} + +static void lightrec_mfc_cb(struct lightrec_state *state, union code op) +{ + u32 rt = lightrec_mfc(state, op); + + if (op.i.op == OP_SWC2) + state->temp_reg = rt; + else if (op.r.rt) + state->regs.gpr[op.r.rt] = rt; } static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) @@ -540,21 +598,36 @@ static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data) } } -void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) +void lightrec_mtc(struct lightrec_state *state, union code op, u8 reg, u32 data) { - if (op.i.op == OP_CP0) - lightrec_mtc0(state, op.r.rd, data); - else if (op.r.rs == OP_CP2_BASIC_CTC2) - lightrec_ctc2(state, op.r.rd, data); - else - lightrec_mtc2(state, op.r.rd, data); + if (op.i.op == OP_CP0) { + lightrec_mtc0(state, reg, data); + } else { + if (op.i.op == OP_LWC2 || op.r.rs != OP_CP2_BASIC_CTC2) + lightrec_mtc2(state, reg, data); + else + lightrec_ctc2(state, reg, data); + + if (state->ops.cop2_notify) + (*state->ops.cop2_notify)(state, op.opcode, data); + } } static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg) { union code op = (union code) arg; + u32 data; + u8 reg; - lightrec_mtc(state, op, state->regs.gpr[op.r.rt]); + if (op.i.op == OP_LWC2) { + data = state->temp_reg; + reg = op.i.rt; + } else { + data = state->regs.gpr[op.r.rt]; + reg = op.r.rd; + } + + lightrec_mtc(state, op, reg, data); } void lightrec_rfe(struct lightrec_state *state) @@ -586,31 +659,26 @@ static void lightrec_cp_cb(struct lightrec_state *state, u32 arg) lightrec_cp(state, (union code) arg); } -static void lightrec_syscall_cb(struct lightrec_state *state) -{ - lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL); -} - -static void lightrec_break_cb(struct lightrec_state *state) -{ - lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK); -} - static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) { struct block *block = lightrec_find_block(state->block_cache, pc); + u8 old_flags; if (block && lightrec_block_is_outdated(state, block)) { pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); - /* Make sure the recompiler isn't processing the block we'll - * destroy */ - if (ENABLE_THREADED_COMPILER) - lightrec_recompiler_remove(state->rec, block); + old_flags = block_set_flags(block, BLOCK_IS_DEAD); + if (!(old_flags & BLOCK_IS_DEAD)) { + /* Make sure the recompiler isn't processing the block + * we'll destroy */ + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + lightrec_unregister_block(state->block_cache, block); + remove_from_code_lut(state->block_cache, block); + lightrec_free_block(state, block); + } - lightrec_unregister_block(state->block_cache, block); - remove_from_code_lut(state->block_cache, block); - lightrec_free_block(state, block); block = NULL; } @@ -635,7 +703,7 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) void *func; int err; - for (;;) { + do { func = lut_read(state, lut_offset(pc)); if (func && func != state->get_next_block) break; @@ -645,19 +713,19 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (unlikely(!block)) break; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) { + if (OPT_REPLACE_MEMSET && + block_has_flag(block, BLOCK_IS_MEMSET)) { func = state->memset_func; break; } - should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE && - !(block->flags & BLOCK_IS_DEAD); + should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) && + !block_has_flag(block, BLOCK_NEVER_COMPILE) && + !block_has_flag(block, BLOCK_IS_DEAD); if (unlikely(should_recompile)) { pr_debug("Block at PC 0x%08x should recompile\n", pc); - lightrec_unregister(MEM_FOR_CODE, block->code_size); - if (ENABLE_THREADED_COMPILER) { lightrec_recompiler_add(state->rec, block); } else { @@ -677,12 +745,12 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (likely(func)) break; - if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) { + if (unlikely(block_has_flag(block, BLOCK_NEVER_COMPILE))) { pc = lightrec_emulate_block(state, block, pc); } else if (!ENABLE_THREADED_COMPILER) { /* Block wasn't compiled yet - run the interpreter */ - if (block->flags & BLOCK_FULLY_TAGGED) + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) pr_debug("Block fully tagged, skipping first pass\n"); else if (ENABLE_FIRST_PASS && likely(!should_recompile)) pc = lightrec_emulate_block(state, block, pc); @@ -693,29 +761,25 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) state->exit_flags = LIGHTREC_EXIT_NOMEM; return NULL; } + } else if (unlikely(block_has_flag(block, BLOCK_IS_DEAD))) { + /* + * If the block is dead but has never been compiled, + * then its function pointer is NULL and we cannot + * execute the block. In that case, reap all the dead + * blocks now, and in the next loop we will create a + * new block. + */ + lightrec_reaper_reap(state->reaper); } else { lightrec_recompiler_add(state->rec, block); } - - if (state->exit_flags != LIGHTREC_EXIT_NORMAL || - state->current_cycle >= state->target_cycle) - break; - } + } while (state->exit_flags == LIGHTREC_EXIT_NORMAL + && state->current_cycle < state->target_cycle); state->next_pc = pc; return func; } -static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, - void (*f)(struct lightrec_state *, u32), u32 arg) -{ - state->current_cycle = state->target_cycle - cycles_delta; - - (*f)(state, arg); - - return state->target_cycle - state->current_cycle; -} - static void * lightrec_alloc_code(struct lightrec_state *state, size_t size) { void *code; @@ -757,6 +821,8 @@ static void lightrec_free_code(struct lightrec_state *state, void *ptr) lightrec_code_alloc_unlock(state); } +static char lightning_code_data[0x80000]; + static void * lightrec_emit_code(struct lightrec_state *state, const struct block *block, jit_state_t *_jit, unsigned int *size) @@ -767,7 +833,9 @@ static void * lightrec_emit_code(struct lightrec_state *state, jit_realize(); - if (!ENABLE_DISASSEMBLER) + if (ENABLE_DISASSEMBLER) + jit_set_data(lightning_code_data, sizeof(lightning_code_data), 0); + else jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); if (has_code_buffer) { @@ -813,6 +881,9 @@ static void * lightrec_emit_code(struct lightrec_state *state, *size = (unsigned int) new_code_size; + if (state->ops.code_inv) + state->ops.code_inv(code, new_code_size); + return code; } @@ -821,9 +892,17 @@ static struct block * generate_wrapper(struct lightrec_state *state) struct block *block; jit_state_t *_jit; unsigned int i; - int stack_ptr; - jit_node_t *to_tramp, *to_fn_epilog; jit_node_t *addr[C_WRAPPERS_COUNT - 1]; + jit_node_t *to_end[C_WRAPPERS_COUNT - 1]; + u8 tmp = JIT_R1; + +#ifdef __sh__ + /* On SH, GBR-relative loads target the r0 register. + * Use it as the temporary register to factorize the move to + * JIT_R1. */ + if (LIGHTREC_REG_STATE == _GBR) + tmp = _R0; +#endif block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -840,67 +919,70 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_prolog(); jit_tramp(256); - /* Add entry points; separate them by opcodes that increment - * LIGHTREC_REG_STATE (since we cannot touch other registers). - * The difference will then tell us which C function to call. */ + /* Add entry points */ for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { - jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8); + jit_ldxi(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[i])); + to_end[i - 1] = jit_b(); addr[i - 1] = jit_indirect(); } + jit_ldxi(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[0])); + + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + jit_patch(to_end[i]); + jit_movr(JIT_R1, tmp); + jit_epilog(); jit_prolog(); - stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS); - /* Save all temporaries on stack */ - for (i = 0; i < NUM_TEMPS; i++) - jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); - - jit_getarg(JIT_R1, jit_arg()); + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]), + LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP)); + } + } - /* Jump to the trampoline */ - to_tramp = jit_jmpi(); + jit_getarg(JIT_R2, jit_arg()); - /* The trampoline will jump back here */ - to_fn_epilog = jit_label(); + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_R2); - /* Restore temporaries from stack */ - for (i = 0; i < NUM_TEMPS; i++) - jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t)); + jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); - jit_ret(); - jit_epilog(); + /* state->current_cycle = state->target_cycle - delta; */ + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE); - /* Trampoline entry point. - * The sole purpose of the trampoline is to cheese Lightning not to - * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we - * do want to return to the caller with this register modified. */ - jit_prolog(); - jit_tramp(256); - jit_patch(to_tramp); + /* Call the wrapper function */ + jit_finishr(JIT_R1); - /* Retrieve the wrapper function */ - jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrappers)); + /* delta = state->target_cycle - state->current_cycle */; + jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE); - /* Restore LIGHTREC_REG_STATE to its correct value */ - jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state); - - jit_prepare(); - jit_pushargr(LIGHTREC_REG_STATE); - jit_pushargr(LIGHTREC_REG_CYCLE); - jit_pushargr(JIT_R0); - jit_pushargr(JIT_R1); - jit_finishi(c_function_wrapper); - jit_retval_i(LIGHTREC_REG_CYCLE); + /* Restore temporaries from stack */ + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, wrapper_regs[i])); + } + } - jit_patch_at(jit_jmpi(), to_fn_epilog); + jit_ret(); jit_epilog(); block->_jit = _jit; block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; block->function = lightrec_emit_code(state, block, _jit, @@ -952,11 +1034,54 @@ static u32 lightrec_memset(struct lightrec_state *state) return 8 + 5 * (length + 3 / 4); } +static u32 lightrec_check_load_delay(struct lightrec_state *state, u32 pc, u8 reg) +{ + struct block *block; + union code first_op; + + first_op = lightrec_read_opcode(state, pc); + + if (likely(!opcode_reads_register(first_op, reg))) { + state->regs.gpr[reg] = state->temp_reg; + } else { + block = lightrec_get_block(state, pc); + if (unlikely(!block)) { + pr_err("Unable to get block at PC 0x%08x\n", pc); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); + pc = 0; + } else { + pc = lightrec_handle_load_delay(state, block, pc, reg); + } + } + + return pc; +} + +static void update_cycle_counter_before_c(jit_state_t *_jit) +{ + /* update state->current_cycle */ + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, JIT_R1); +} + +static void update_cycle_counter_after_c(jit_state_t *_jit) +{ + /* Recalc the delta */ + jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); +} + static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *loop, *addr, *addr2, *addr3; + jit_node_t *to_end, *loop, *addr, *addr2, *addr3, *addr4, *addr5, *jmp, *jmp2; unsigned int i; u32 offset; @@ -974,36 +1099,93 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_prolog(); jit_frame(256); - jit_getarg(JIT_R0, jit_arg()); + jit_getarg(LIGHTREC_REG_STATE, jit_arg()); + jit_getarg(JIT_V0, jit_arg()); + jit_getarg(JIT_V1, jit_arg()); jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg()); /* Force all callee-saved registers to be pushed on the stack */ for (i = 0; i < NUM_REGS; i++) - jit_movr(JIT_V(i), JIT_V(i)); - - /* Pass lightrec_state structure to blocks, using the last callee-saved - * register that Lightning provides */ - jit_movi(LIGHTREC_REG_STATE, (intptr_t) state); + jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG)); loop = jit_label(); /* Call the block's code */ - jit_jmpr(JIT_R0); + jit_jmpr(JIT_V1); if (OPT_REPLACE_MEMSET) { /* Blocks will jump here when they need to call * lightrec_memset() */ addr3 = jit_indirect(); + jit_movr(JIT_V1, LIGHTREC_REG_CYCLE); + jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); + jit_finishi(lightrec_memset); + jit_retval(LIGHTREC_REG_CYCLE); jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, regs.gpr[31])); + jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS) + jmp = jit_b(); + } + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) { + /* Blocks will jump here when they reach a branch that should + * be executed with the interpreter, passing the branch's PC + * in JIT_V0 and the address of the block in JIT_V1. */ + addr4 = jit_indirect(); + + update_cycle_counter_before_c(_jit); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V1); + jit_pushargr(JIT_V0); + jit_finishi(lightrec_emulate_block); + + jit_retval(JIT_V0); + + update_cycle_counter_after_c(_jit); + + if (OPT_HANDLE_LOAD_DELAYS) + jmp2 = jit_b(); - jit_retval(JIT_R0); - jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0); + } + + if (OPT_HANDLE_LOAD_DELAYS) { + /* Blocks will jump here when they reach a branch with a load + * opcode in its delay slot. The delay slot has already been + * executed; the load value is in (state->temp_reg), and the + * register number is in JIT_V1. + * Jump to a C function which will evaluate the branch target's + * first opcode, to make sure that it does not read the register + * in question; and if it does, handle it accordingly. */ + addr5 = jit_indirect(); + + update_cycle_counter_before_c(_jit); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V0); + jit_pushargr(JIT_V1); + jit_finishi(lightrec_check_load_delay); + + jit_retval(JIT_V0); + + update_cycle_counter_after_c(_jit); + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) + jit_patch(jmp2); + } + + if (OPT_REPLACE_MEMSET + && (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS)) { + jit_patch(jmp); } /* The block will jump here, with the number of cycles remaining in @@ -1018,61 +1200,64 @@ static struct block * generate_dispatcher(struct lightrec_state *state) to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); /* Convert next PC to KUNSEG and avoid mirrors */ - jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); - jit_rshi_u(JIT_R1, JIT_R0, 28); + jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); + jit_rshi_u(JIT_R1, JIT_V1, 28); jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1); jit_addi(JIT_R2, JIT_R2, RAM_SIZE); - jit_movnr(JIT_R0, JIT_R2, JIT_R1); + jit_movnr(JIT_V1, JIT_R2, JIT_R1); /* If possible, use the code LUT */ if (!lut_is_32bit(state)) - jit_lshi(JIT_R0, JIT_R0, 1); - jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE); + jit_lshi(JIT_V1, JIT_V1, 1); + jit_add_state(JIT_V1, JIT_V1); offset = offsetof(struct lightrec_state, code_lut); if (lut_is_32bit(state)) - jit_ldxi_ui(JIT_R0, JIT_R0, offset); + jit_ldxi_ui(JIT_V1, JIT_V1, offset); else - jit_ldxi(JIT_R0, JIT_R0, offset); + jit_ldxi(JIT_V1, JIT_V1, offset); /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); + + /* The code LUT will be set to this address when the block at the target + * PC has been preprocessed but not yet compiled by the threaded + * recompiler */ + addr = jit_indirect(); /* Slow path: call C function get_next_block_func() */ if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* We may call the interpreter - update state->current_cycle */ - jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); - jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); - jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, JIT_R1); + update_cycle_counter_before_c(_jit); } - /* The code LUT will be set to this address when the block at the target - * PC has been preprocessed but not yet compiled by the threaded - * recompiler */ - addr = jit_indirect(); - - /* Get the next block */ jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(JIT_V0); + + /* Save the cycles register if needed */ + if (!(ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES)) + jit_movr(JIT_V0, LIGHTREC_REG_CYCLE); + + /* Get the next block */ jit_finishi(&get_next_block_func); - jit_retval(JIT_R0); + jit_retval(JIT_V1); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* The interpreter may have updated state->current_cycle and * state->target_cycle - recalc the delta */ - jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, current_cycle)); - jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); - jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); + update_cycle_counter_after_c(_jit); + } else { + jit_movr(LIGHTREC_REG_CYCLE, JIT_V0); } + /* Reset JIT_V0 to the next PC */ + jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, next_pc)); + /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); /* When exiting, the recompiled code will jump to that address */ jit_note(__FILE__, __LINE__); @@ -1083,7 +1268,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) block->_jit = _jit; block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; block->function = lightrec_emit_code(state, block, _jit, @@ -1092,6 +1277,10 @@ static struct block * generate_dispatcher(struct lightrec_state *state) goto err_free_block; state->eob_wrapper_func = jit_address(addr2); + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) + state->interpreter_func = jit_address(addr4); + if (OPT_HANDLE_LOAD_DELAYS) + state->ds_check_func = jit_address(addr5); if (OPT_REPLACE_MEMSET) state->memset_func = jit_address(addr3); state->get_next_block = jit_address(addr); @@ -1122,16 +1311,18 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) return (union code) LE32TOH(*code); } -unsigned int lightrec_cycles_of_opcode(union code code) +__cnst unsigned int lightrec_cycles_of_opcode(union code code) { return 2; } -void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block) +void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops) { + struct opcode_list *list = container_of(ops, struct opcode_list, ops); + lightrec_free(state, MEM_FOR_IR, - sizeof(*block->opcode_list) * block->nb_ops, - block->opcode_list); + sizeof(*list) + list->nb_ops * sizeof(struct opcode), + list); } static unsigned int lightrec_get_mips_block_len(const u32 *src) @@ -1153,25 +1344,28 @@ static unsigned int lightrec_get_mips_block_len(const u32 *src) static struct opcode * lightrec_disassemble(struct lightrec_state *state, const u32 *src, unsigned int *len) { - struct opcode *list; + struct opcode_list *list; unsigned int i, length; length = lightrec_get_mips_block_len(src); - list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length); + list = lightrec_malloc(state, MEM_FOR_IR, + sizeof(*list) + sizeof(struct opcode) * length); if (!list) { pr_err("Unable to allocate memory\n"); return NULL; } + list->nb_ops = (u16) length; + for (i = 0; i < length; i++) { - list[i].opcode = LE32TOH(src[i]); - list[i].flags = 0; + list->ops[i].opcode = LE32TOH(src[i]); + list->ops[i].flags = 0; } *len = length * sizeof(u32); - return list; + return list->ops; } static struct block * lightrec_precompile_block(struct lightrec_state *state, @@ -1179,11 +1373,12 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, { struct opcode *list; struct block *block; - void *host; + void *host, *addr; const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *) host; unsigned int length; bool fully_tagged; + u8 block_flags = 0; if (!map) return NULL; @@ -1209,9 +1404,6 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, block->flags = 0; block->code_size = 0; block->precompile_date = state->current_cycle; -#if ENABLE_THREADED_COMPILER - block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT; -#endif block->nb_ops = length / sizeof(u32); lightrec_optimize(state, block); @@ -1227,21 +1419,22 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, pr_debug("Block size: %hu opcodes\n", block->nb_ops); - /* If the first opcode is an 'impossible' branch, never compile the - * block */ - if (should_emulate(block->opcode_list)) - block->flags |= BLOCK_NEVER_COMPILE; - fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) - block->flags |= BLOCK_FULLY_TAGGED; + block_flags |= BLOCK_FULLY_TAGGED; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) - lut_write(state, lut_offset(pc), state->memset_func); + if (block_flags) + block_set_flags(block, block_flags); block->hash = lightrec_calculate_block_hash(block); - pr_debug("Recompile count: %u\n", state->nb_precompile++); + if (OPT_REPLACE_MEMSET && block_has_flag(block, BLOCK_IS_MEMSET)) + addr = state->memset_func; + else + addr = state->get_next_block; + lut_write(state, lut_offset(pc), addr); + + pr_debug("Blocks created: %u\n", ++state->nb_precompile); return block; } @@ -1254,8 +1447,12 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; - /* Verify that all load/stores of the opcode list - * Check all loads/stores of the opcode list and mark the + /* If we have one branch that must be emulated, we cannot trash + * the opcode list. */ + if (should_emulate(op)) + return false; + + /* Check all loads/stores of the opcode list and mark the * block as fully compiled if they all have been tagged. */ switch (op->c.i.op) { case OP_LB: @@ -1310,24 +1507,31 @@ static void lightrec_reap_function(struct lightrec_state *state, void *data) lightrec_free_function(state, data); } +static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data) +{ + lightrec_free_opcode_list(state, data); +} + int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block) { struct lightrec_state *state = cstate->state; struct lightrec_branch_target *target; - bool op_list_freed = false, fully_tagged = false; + bool fully_tagged = false; struct block *block2; struct opcode *elm; jit_state_t *_jit, *oldjit; jit_node_t *start_of_block; bool skip_next = false; void *old_fn, *new_fn; + size_t old_code_size; unsigned int i, j; + u8 old_flags; u32 offset; fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) - block->flags |= BLOCK_FULLY_TAGGED; + block_set_flags(block, BLOCK_FULLY_TAGGED); _jit = jit_new_state(); if (!_jit) @@ -1335,13 +1539,16 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, oldjit = block->_jit; old_fn = block->function; + old_code_size = block->code_size; block->_jit = _jit; lightrec_regcache_reset(cstate->reg_cache); + lightrec_preload_pc(cstate->reg_cache); + cstate->cycles = 0; - cstate->nb_branches = 0; cstate->nb_local_branches = 0; cstate->nb_targets = 0; + cstate->no_load_delay = false; jit_prolog(); jit_tramp(256); @@ -1360,7 +1567,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_debug("Branch at offset 0x%x will be emulated\n", i << 2); - lightrec_emit_eob(cstate, block, i, false); + lightrec_emit_jump_to_interpreter(cstate, block, i); skip_next = !op_flag_no_ds(elm->flags); } else { lightrec_rec_opcode(cstate, block, i); @@ -1377,9 +1584,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, cstate->cycles += lightrec_cycles_of_opcode(elm->c); } - for (i = 0; i < cstate->nb_branches; i++) - jit_patch(cstate->branches[i]); - for (i = 0; i < cstate->nb_local_branches; i++) { struct lightrec_branch *branch = &cstate->local_branches[i]; @@ -1403,7 +1607,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_err("Unable to find branch target\n"); } - jit_patch_abs(jit_jmpi(), state->eob_wrapper_func); jit_ret(); jit_epilog(); @@ -1412,22 +1615,24 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, if (!ENABLE_THREADED_COMPILER) pr_err("Unable to compile block!\n"); block->_jit = oldjit; + jit_clear_state(); _jit_destroy_state(_jit); return -ENOMEM; } + /* Pause the reaper, because lightrec_reset_lut_offset() may try to set + * the old block->function pointer to the code LUT. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); + block->function = new_fn; - block->flags &= ~BLOCK_SHOULD_RECOMPILE; + block_clear_flags(block, BLOCK_SHOULD_RECOMPILE); /* Add compiled function to the LUT */ lut_write(state, lut_offset(block->pc), block->function); - if (ENABLE_THREADED_COMPILER) { - /* Since we might try to reap the same block multiple times, - * we need the reaper to wait until everything has been - * submitted, so that the duplicate entries can be dropped. */ - lightrec_reaper_pause(state->reaper); - } + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_continue(state->reaper); /* Detect old blocks that have been covered by the new one */ for (i = 0; i < cstate->nb_targets; i++) { @@ -1437,6 +1642,13 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, continue; offset = block->pc + target->offset * sizeof(u32); + + /* Pause the reaper while we search for the block until we set + * the BLOCK_IS_DEAD flag, otherwise the block may be removed + * under our feet. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); + block2 = lightrec_find_block(state->block_cache, offset); if (block2) { /* No need to check if block2 is compilable - it must @@ -1444,12 +1656,16 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, /* Set the "block dead" flag to prevent the dynarec from * recompiling this block */ - block2->flags |= BLOCK_IS_DEAD; + old_flags = block_set_flags(block2, BLOCK_IS_DEAD); + } + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_continue(state->reaper); /* If block2 was pending for compilation, cancel it. * If it's being compiled right now, wait until it * finishes. */ - if (ENABLE_THREADED_COMPILER) + if (block2) lightrec_recompiler_remove(state->rec, block2); } @@ -1464,20 +1680,17 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, "0x%08x\n", block2->pc, block->pc); /* Finally, reap the block. */ - if (ENABLE_THREADED_COMPILER) { + if (!ENABLE_THREADED_COMPILER) { + lightrec_unregister_block(state->block_cache, block2); + lightrec_free_block(state, block2); + } else if (!(old_flags & BLOCK_IS_DEAD)) { lightrec_reaper_add(state->reaper, lightrec_reap_block, block2); - } else { - lightrec_unregister_block(state->block_cache, block2); - lightrec_free_block(state, block2); } } } - if (ENABLE_THREADED_COMPILER) - lightrec_reaper_continue(state->reaper); - if (ENABLE_DISASSEMBLER) { pr_debug("Compiling block at PC: 0x%08x\n", block->pc); jit_disassemble(); @@ -1485,15 +1698,20 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, jit_clear_state(); -#if ENABLE_THREADED_COMPILER if (fully_tagged) - op_list_freed = atomic_flag_test_and_set(&block->op_list_freed); -#endif - if (fully_tagged && !op_list_freed) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) { pr_debug("Block PC 0x%08x is fully tagged" " - free opcode list\n", block->pc); - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_add(state->reaper, + lightrec_reap_opcode_list, + block->opcode_list); + } else { + lightrec_free_opcode_list(state, block->opcode_list); + } } if (oldjit) { @@ -1509,8 +1727,12 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, _jit_destroy_state(oldjit); lightrec_free_function(state, old_fn); } + + lightrec_unregister(MEM_FOR_CODE, old_code_size); } + pr_debug("Blocks compiled: %u\n", ++state->nb_compile); + return 0; } @@ -1530,7 +1752,7 @@ static void lightrec_print_info(struct lightrec_state *state) u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) { - s32 (*func)(void *, s32) = (void *)state->dispatcher->function; + s32 (*func)(struct lightrec_state *, u32, void *, s32) = (void *)state->dispatcher->function; void *block_trace; s32 cycles_delta; @@ -1547,7 +1769,8 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) if (block_trace) { cycles_delta = state->target_cycle - state->current_cycle; - cycles_delta = (*func)(block_trace, cycles_delta); + cycles_delta = (*func)(state, state->next_pc, + block_trace, cycles_delta); state->current_cycle = state->target_cycle - cycles_delta; } @@ -1561,20 +1784,24 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) return state->next_pc; } -u32 lightrec_execute_one(struct lightrec_state *state, u32 pc) +u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc, + u32 target_cycle) { - return lightrec_execute(state, pc, state->current_cycle); -} - -u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) -{ - struct block *block = lightrec_get_block(state, pc); - if (!block) - return 0; + struct block *block; state->exit_flags = LIGHTREC_EXIT_NORMAL; + state->target_cycle = target_cycle; - pc = lightrec_emulate_block(state, block, pc); + do { + block = lightrec_get_block(state, pc); + if (!block) + break; + + pc = lightrec_emulate_block(state, block, pc); + + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_reap(state->reaper); + } while (state->current_cycle < state->target_cycle); if (LOG_LEVEL >= INFO_L) lightrec_print_info(state); @@ -1584,9 +1811,13 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) void lightrec_free_block(struct lightrec_state *state, struct block *block) { + u8 old_flags; + lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32)); - if (block->opcode_list) - lightrec_free_opcode_list(state, block); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + lightrec_free_opcode_list(state, block->opcode_list); if (block->_jit) _jit_destroy_state(block->_jit); if (block->function) { @@ -1639,6 +1870,11 @@ struct lightrec_state * lightrec_init(char *argv0, return NULL; } + if (ops->cop2_notify) + pr_debug("Optional cop2_notify callback in lightrec_ops\n"); + else + pr_debug("No optional cop2_notify callback in lightrec_ops\n"); + if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER && codebuf_map->address) { tlsf = tlsf_create_with_pool(codebuf_map->address, @@ -1669,6 +1905,7 @@ struct lightrec_state * lightrec_init(char *argv0, state->tlsf = tlsf; state->with_32bit_lut = with_32bit_lut; + state->in_delay_slot_n = 0xff; state->block_cache = lightrec_blockcache_init(state); if (!state->block_cache) @@ -1703,10 +1940,9 @@ struct lightrec_state * lightrec_init(char *argv0, state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb; state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb; + state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb; state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb; - state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb; - state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb; map = &state->maps[PSX_MAP_BIOS]; state->offset_bios = (uintptr_t)map->address - map->pc; @@ -1714,6 +1950,9 @@ struct lightrec_state * lightrec_init(char *argv0, map = &state->maps[PSX_MAP_SCRATCH_PAD]; state->offset_scratch = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_HW_REGISTERS]; + state->offset_io = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_KERNEL_USER_RAM]; state->offset_ram = (uintptr_t)map->address - map->pc; @@ -1725,6 +1964,7 @@ struct lightrec_state * lightrec_init(char *argv0, if (state->offset_bios == 0 && state->offset_scratch == 0 && state->offset_ram == 0 && + state->offset_io == 0 && state->mirrors_mapped) { pr_info("Memory map is perfect. Emitted code will be best.\n"); } else {