X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightrec%2Flightrec.c;h=1cfc4274902b8c3d93d1f1cf6106db8d6cbf3fc9;hb=e2fb1389dc12376acb84e4993ed3b08760257252;hp=fa08a800d721df81d090e5d8329eb73efe7c49f7;hpb=6ce0b00a79f4fa7c4a1735be9f48bfbc644b080c;p=pcsx_rearmed.git diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index fa08a800..1cfc4274 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -28,9 +28,6 @@ #include #include -#define GENMASK(h, l) \ - (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h)))) - static struct block * lightrec_precompile_block(struct lightrec_state *state, u32 pc); static bool lightrec_block_is_fully_tagged(const struct block *block); @@ -38,21 +35,23 @@ static bool lightrec_block_is_fully_tagged(const struct block *block); static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data); static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg); +static void lightrec_reap_block(struct lightrec_state *state, void *data); + static void lightrec_default_sb(struct lightrec_state *state, u32 opcode, - void *host, u32 addr, u8 data) + void *host, u32 addr, u32 data) { - *(u8 *)host = data; + *(u8 *)host = (u8)data; - if (!state->invalidate_from_dma_only) + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate(state, addr, 1); } static void lightrec_default_sh(struct lightrec_state *state, u32 opcode, - void *host, u32 addr, u16 data) + void *host, u32 addr, u32 data) { - *(u16 *)host = HTOLE16(data); + *(u16 *)host = HTOLE16((u16)data); - if (!state->invalidate_from_dma_only) + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate(state, addr, 2); } @@ -61,7 +60,7 @@ static void lightrec_default_sw(struct lightrec_state *state, u32 opcode, { *(u32 *)host = HTOLE32(data); - if (!state->invalidate_from_dma_only) + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate(state, addr, 4); } @@ -83,6 +82,27 @@ static u32 lightrec_default_lw(struct lightrec_state *state, return LE32TOH(*(u32 *)host); } +static u32 lightrec_default_lwu(struct lightrec_state *state, + u32 opcode, void *host, u32 addr) +{ + u32 val; + + memcpy(&val, host, 4); + + return LE32TOH(val); +} + +static void lightrec_default_swu(struct lightrec_state *state, u32 opcode, + void *host, u32 addr, u32 data) +{ + data = HTOLE32(data); + + memcpy(host, &data, 4); + + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) + lightrec_invalidate(state, addr & ~0x3, 8); +} + static const struct lightrec_mem_map_ops lightrec_default_ops = { .sb = lightrec_default_sb, .sh = lightrec_default_sh, @@ -90,6 +110,8 @@ static const struct lightrec_mem_map_ops lightrec_default_ops = { .lb = lightrec_default_lb, .lh = lightrec_default_lh, .lw = lightrec_default_lw, + .lwu = lightrec_default_lwu, + .swu = lightrec_default_swu, }; static void __segfault_cb(struct lightrec_state *state, u32 addr, @@ -97,9 +119,9 @@ static void __segfault_cb(struct lightrec_state *state, u32 addr, { lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); pr_err("Segmentation fault in recompiled code: invalid " - "load/store at address 0x%08x\n", addr); + "load/store at address "PC_FMT"\n", addr); if (block) - pr_err("Was executing block PC 0x%08x\n", block->pc); + pr_err("Was executing block "PC_FMT"\n", block->pc); } static void lightrec_swl(struct lightrec_state *state, @@ -107,7 +129,7 @@ static void lightrec_swl(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { unsigned int shift = addr & 0x3; - unsigned int mask = GENMASK(31, (shift + 1) * 8); + unsigned int mask = shift < 3 ? GENMASK(31, (shift + 1) * 8) : 0; u32 old_data; /* Align to 32 bits */ @@ -171,7 +193,7 @@ static u32 lightrec_lwr(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { unsigned int shift = addr & 0x3; - unsigned int mask = GENMASK(31, 32 - shift * 8); + unsigned int mask = shift ? GENMASK(31, 32 - shift * 8) : 0; u32 old_data; /* Align to 32 bits */ @@ -201,7 +223,7 @@ static void lightrec_invalidate_map(struct lightrec_state *state, } } -enum psx_map +static enum psx_map lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr) { const struct lightrec_mem_map *map; @@ -240,26 +262,43 @@ lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) return map; } -u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u32 *flags, struct block *block) +u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base, + u32 data, u32 *flags, struct block *block, u16 offset) { const struct lightrec_mem_map *map; const struct lightrec_mem_map_ops *ops; u32 opcode = op.opcode; + bool was_tagged = true; + u16 old_flags; + u32 addr; void *host; - addr += (s16) op.i.imm; + addr = kunseg(base + (s16) op.i.imm); - map = lightrec_get_map(state, &host, kunseg(addr)); + map = lightrec_get_map(state, &host, addr); if (!map) { __segfault_cb(state, addr, block); return 0; } + if (flags) + was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(*flags); if (likely(!map->ops)) { - if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) - *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) { + /* Force parallel port accesses as HW accesses, because + * the direct-I/O emitters can't differenciate it. */ + if (unlikely(map == &state->maps[PSX_MAP_PARALLEL_PORT])) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + /* If the base register is 0x0, be extra suspicious. + * Some games (e.g. Sled Storm) actually do segmentation + * faults by using uninitialized pointers, which are + * later initialized to point to hardware registers. */ + else if (op.i.rs && base == 0x0) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + else + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + } ops = &lightrec_default_ops; } else if (flags && @@ -272,12 +311,23 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, ops = map->ops; } + if (!was_tagged) { + old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); + + if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { + pr_debug("Opcode of block at "PC_FMT" has been tagged" + " - flag for recompilation\n", block->pc); + + lut_write(state, lut_offset(block->pc), NULL); + } + } + switch (op.i.op) { case OP_SB: - ops->sb(state, opcode, host, addr, (u8) data); + ops->sb(state, opcode, host, addr, data); return 0; case OP_SH: - ops->sh(state, opcode, host, addr, (u16) data); + ops->sh(state, opcode, host, addr, data); return 0; case OP_SWL: lightrec_swl(state, ops, opcode, host, addr, data); @@ -306,6 +356,11 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, return lightrec_lwl(state, ops, opcode, host, addr, data); case OP_LWR: return lightrec_lwr(state, ops, opcode, host, addr, data); + case OP_META_LWU: + return ops->lwu(state, opcode, host, addr); + case OP_META_SWU: + ops->swu(state, opcode, host, addr, data); + return 0; case OP_LW: default: return ops->lw(state, opcode, host, addr); @@ -314,10 +369,10 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, static void lightrec_rw_helper(struct lightrec_state *state, union code op, u32 *flags, - struct block *block) + struct block *block, u16 offset) { u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs], - state->regs.gpr[op.i.rt], flags, block); + state->regs.gpr[op.i.rt], flags, block, offset); switch (op.i.op) { case OP_LB: @@ -327,8 +382,13 @@ static void lightrec_rw_helper(struct lightrec_state *state, case OP_LWL: case OP_LWR: case OP_LW: - if (op.i.rt) + case OP_META_LWU: + if (OPT_HANDLE_LOAD_DELAYS && unlikely(!state->in_delay_slot_n)) { + state->temp_reg = ret; + state->in_delay_slot_n = 0xff; + } else if (op.i.rt) { state->regs.gpr[op.i.rt] = ret; + } fallthrough; default: break; @@ -337,41 +397,26 @@ static void lightrec_rw_helper(struct lightrec_state *state, static void lightrec_rw_cb(struct lightrec_state *state, u32 arg) { - lightrec_rw_helper(state, (union code) arg, NULL, NULL); + lightrec_rw_helper(state, (union code) arg, NULL, NULL, 0); } static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) { struct block *block; struct opcode *op; - bool was_tagged; u16 offset = (u16)arg; - u16 old_flags; block = lightrec_find_block_from_lut(state->block_cache, - arg >> 16, state->next_pc); + arg >> 16, state->curr_pc); if (unlikely(!block)) { - pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n", - state->next_pc, offset); + pr_err("rw_generic: No block found in LUT for "PC_FMT" offset 0x%"PRIx16"\n", + state->curr_pc, offset); lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return; } op = &block->opcode_list[offset]; - was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags); - - lightrec_rw_helper(state, op->c, &op->flags, block); - - if (!was_tagged) { - old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); - - if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { - pr_debug("Opcode of block at PC 0x%08x has been tagged" - " - flag for recompilation\n", block->pc); - - lut_write(state, lut_offset(block->pc), NULL); - } - } + lightrec_rw_helper(state, op->c, &op->flags, block, offset); } static u32 clamp_s32(s32 val, s32 min, s32 max) @@ -431,23 +476,43 @@ u32 lightrec_mfc(struct lightrec_state *state, union code op) if (op.i.op == OP_CP0) return state->regs.cp0[op.r.rd]; - else if (op.r.rs == OP_CP2_BASIC_MFC2) - return lightrec_mfc2(state, op.r.rd); - val = state->regs.cp2c[op.r.rd]; - - switch (op.r.rd) { - case 4: - case 12: - case 20: - case 26: - case 27: - case 29: - case 30: - return (u32)(s16)val; - default: - return val; + if (op.i.op == OP_SWC2) { + val = lightrec_mfc2(state, op.i.rt); + } else if (op.r.rs == OP_CP2_BASIC_MFC2) + val = lightrec_mfc2(state, op.r.rd); + else { + val = state->regs.cp2c[op.r.rd]; + + switch (op.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + val = (u32)(s16)val; + fallthrough; + default: + break; + } } + + if (state->ops.cop2_notify) + (*state->ops.cop2_notify)(state, op.opcode, val); + + return val; +} + +static void lightrec_mfc_cb(struct lightrec_state *state, union code op) +{ + u32 rt = lightrec_mfc(state, op); + + if (op.i.op == OP_SWC2) + state->temp_reg = rt; + else if (op.r.rt) + state->regs.gpr[op.r.rt] = rt; } static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) @@ -490,7 +555,7 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) status = state->regs.cp0[12]; /* Handle software interrupts */ - if (!!(status & cause & 0x300) & status) + if ((!!(status & cause & 0x300)) & status) lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); /* Handle hardware interrupts */ @@ -564,21 +629,36 @@ static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data) } } -void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) +void lightrec_mtc(struct lightrec_state *state, union code op, u8 reg, u32 data) { - if (op.i.op == OP_CP0) - lightrec_mtc0(state, op.r.rd, data); - else if (op.r.rs == OP_CP2_BASIC_CTC2) - lightrec_ctc2(state, op.r.rd, data); - else - lightrec_mtc2(state, op.r.rd, data); + if (op.i.op == OP_CP0) { + lightrec_mtc0(state, reg, data); + } else { + if (op.i.op == OP_LWC2 || op.r.rs != OP_CP2_BASIC_CTC2) + lightrec_mtc2(state, reg, data); + else + lightrec_ctc2(state, reg, data); + + if (state->ops.cop2_notify) + (*state->ops.cop2_notify)(state, op.opcode, data); + } } static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg) { union code op = (union code) arg; + u32 data; + u8 reg; - lightrec_mtc(state, op, state->regs.gpr[op.r.rt]); + if (op.i.op == OP_LWC2) { + data = state->temp_reg; + reg = op.i.rt; + } else { + data = state->regs.gpr[op.r.rt]; + reg = op.r.rd; + } + + lightrec_mtc(state, op, reg, data); } void lightrec_rfe(struct lightrec_state *state) @@ -616,7 +696,7 @@ static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) u8 old_flags; if (block && lightrec_block_is_outdated(state, block)) { - pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); + pr_debug("Block at "PC_FMT" is outdated!\n", block->pc); old_flags = block_set_flags(block, BLOCK_IS_DEAD); if (!(old_flags & BLOCK_IS_DEAD)) { @@ -625,9 +705,15 @@ static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) if (ENABLE_THREADED_COMPILER) lightrec_recompiler_remove(state->rec, block); - lightrec_unregister_block(state->block_cache, block); remove_from_code_lut(state->block_cache, block); - lightrec_free_block(state, block); + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_add(state->reaper, + lightrec_reap_block, block); + } else { + lightrec_unregister_block(state->block_cache, block); + lightrec_free_block(state, block); + } } block = NULL; @@ -636,7 +722,7 @@ static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) if (!block) { block = lightrec_precompile_block(state, pc); if (!block) { - pr_err("Unable to recompile block at PC 0x%x\n", pc); + pr_err("Unable to recompile block at "PC_FMT"\n", pc); lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return NULL; } @@ -654,7 +740,7 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) void *func; int err; - for (;;) { + do { func = lut_read(state, lut_offset(pc)); if (func && func != state->get_next_block) break; @@ -671,10 +757,11 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) } should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) && + !block_has_flag(block, BLOCK_NEVER_COMPILE) && !block_has_flag(block, BLOCK_IS_DEAD); if (unlikely(should_recompile)) { - pr_debug("Block at PC 0x%08x should recompile\n", pc); + pr_debug("Block at "PC_FMT" should recompile\n", pc); if (ENABLE_THREADED_COMPILER) { lightrec_recompiler_add(state->rec, block); @@ -723,13 +810,10 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) } else { lightrec_recompiler_add(state->rec, block); } + } while (state->exit_flags == LIGHTREC_EXIT_NORMAL + && state->current_cycle < state->target_cycle); - if (state->exit_flags != LIGHTREC_EXIT_NORMAL || - state->current_cycle >= state->target_cycle) - break; - } - - state->next_pc = pc; + state->curr_pc = pc; return func; } @@ -774,6 +858,8 @@ static void lightrec_free_code(struct lightrec_state *state, void *ptr) lightrec_code_alloc_unlock(state); } +static char lightning_code_data[0x80000]; + static void * lightrec_emit_code(struct lightrec_state *state, const struct block *block, jit_state_t *_jit, unsigned int *size) @@ -784,7 +870,9 @@ static void * lightrec_emit_code(struct lightrec_state *state, jit_realize(); - if (!ENABLE_DISASSEMBLER) + if (ENABLE_DISASSEMBLER) + jit_set_data(lightning_code_data, sizeof(lightning_code_data), 0); + else jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); if (has_code_buffer) { @@ -830,6 +918,9 @@ static void * lightrec_emit_code(struct lightrec_state *state, *size = (unsigned int) new_code_size; + if (state->ops.code_inv) + state->ops.code_inv(code, new_code_size); + return code; } @@ -840,6 +931,15 @@ static struct block * generate_wrapper(struct lightrec_state *state) unsigned int i; jit_node_t *addr[C_WRAPPERS_COUNT - 1]; jit_node_t *to_end[C_WRAPPERS_COUNT - 1]; + u8 tmp = JIT_R1; + +#ifdef __sh__ + /* On SH, GBR-relative loads target the r0 register. + * Use it as the temporary register to factorize the move to + * JIT_R1. */ + if (LIGHTREC_REG_STATE == _GBR) + tmp = _R0; +#endif block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -858,17 +958,18 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* Add entry points */ for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { - jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + jit_ldxi(tmp, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, c_wrappers[i])); to_end[i - 1] = jit_b(); addr[i - 1] = jit_indirect(); } - jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + jit_ldxi(tmp, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, c_wrappers[0])); for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) jit_patch(to_end[i]); + jit_movr(JIT_R1, tmp); jit_epilog(); jit_prolog(); @@ -954,27 +1055,78 @@ static u32 lightrec_memset(struct lightrec_state *state) u32 length = state->regs.gpr[5] * 4; if (!map) { - pr_err("Unable to find memory map for memset target address " - "0x%x\n", kunseg_pc); + pr_err("Unable to find memory map for memset target address "PC_FMT"\n", + kunseg_pc); return 0; } - pr_debug("Calling host memset, PC 0x%x (host address 0x%" PRIxPTR ") for %u bytes\n", + pr_debug("Calling host memset, "PC_FMT" (host address 0x%"PRIxPTR") for %u bytes\n", kunseg_pc, (uintptr_t)host, length); memset(host, 0, length); - if (!state->invalidate_from_dma_only) + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) lightrec_invalidate_map(state, map, kunseg_pc, length); /* Rough estimation of the number of cycles consumed */ return 8 + 5 * (length + 3 / 4); } +static u32 lightrec_check_load_delay(struct lightrec_state *state, u32 pc, u8 reg) +{ + struct block *block; + union code first_op; + + first_op = lightrec_read_opcode(state, pc); + + if (likely(!opcode_reads_register(first_op, reg))) { + state->regs.gpr[reg] = state->temp_reg; + } else { + block = lightrec_get_block(state, pc); + if (unlikely(!block)) { + pr_err("Unable to get block at "PC_FMT"\n", pc); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); + pc = 0; + } else { + pc = lightrec_handle_load_delay(state, block, pc, reg); + } + } + + return pc; +} + +static void update_cycle_counter_before_c(jit_state_t *_jit) +{ + /* update state->current_cycle */ + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, JIT_R1); +} + +static void update_cycle_counter_after_c(jit_state_t *_jit) +{ + /* Recalc the delta */ + jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); +} + +static void sync_next_pc(jit_state_t *_jit) +{ + if (lightrec_store_next_pc()) { + jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, next_pc)); + } +} + static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *loop, *addr, *addr2, *addr3; + jit_node_t *to_end, *loop, *addr, *addr2, *addr3, *addr4, *addr5, *jmp, *jmp2; unsigned int i; u32 offset; @@ -992,6 +1144,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_prolog(); jit_frame(256); + jit_getarg(LIGHTREC_REG_STATE, jit_arg()); + jit_getarg(JIT_V0, jit_arg()); jit_getarg(JIT_V1, jit_arg()); jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg()); @@ -999,10 +1153,6 @@ static struct block * generate_dispatcher(struct lightrec_state *state) for (i = 0; i < NUM_REGS; i++) jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG)); - /* Pass lightrec_state structure to blocks, using the last callee-saved - * register that Lightning provides */ - jit_movi(LIGHTREC_REG_STATE, (intptr_t) state); - loop = jit_label(); /* Call the block's code */ @@ -1017,21 +1167,82 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); + jit_finishi(lightrec_memset); + jit_retval(LIGHTREC_REG_CYCLE); jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, regs.gpr[31])); - - jit_retval(LIGHTREC_REG_CYCLE); jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS) + jmp = jit_b(); + } + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) { + /* Blocks will jump here when they reach a branch that should + * be executed with the interpreter, passing the branch's PC + * in JIT_V0 and the address of the block in JIT_V1. */ + addr4 = jit_indirect(); + + sync_next_pc(_jit); + update_cycle_counter_before_c(_jit); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V1); + jit_pushargr(JIT_V0); + jit_finishi(lightrec_emulate_block); + + jit_retval(JIT_V0); + + update_cycle_counter_after_c(_jit); + + if (OPT_HANDLE_LOAD_DELAYS) + jmp2 = jit_b(); + + } + + if (OPT_HANDLE_LOAD_DELAYS) { + /* Blocks will jump here when they reach a branch with a load + * opcode in its delay slot. The delay slot has already been + * executed; the load value is in (state->temp_reg), and the + * register number is in JIT_V1. + * Jump to a C function which will evaluate the branch target's + * first opcode, to make sure that it does not read the register + * in question; and if it does, handle it accordingly. */ + addr5 = jit_indirect(); + + sync_next_pc(_jit); + update_cycle_counter_before_c(_jit); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V0); + jit_pushargr(JIT_V1); + jit_finishi(lightrec_check_load_delay); + + jit_retval(JIT_V0); + + update_cycle_counter_after_c(_jit); } /* The block will jump here, with the number of cycles remaining in * LIGHTREC_REG_CYCLE */ addr2 = jit_indirect(); - /* Store back the next_pc to the lightrec_state structure */ - offset = offsetof(struct lightrec_state, next_pc); + sync_next_pc(_jit); + + if (OPT_HANDLE_LOAD_DELAYS && OPT_DETECT_IMPOSSIBLE_BRANCHES) + jit_patch(jmp2); + + if (OPT_REPLACE_MEMSET + && (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS)) { + jit_patch(jmp); + } + + /* Store back the next PC to the lightrec_state structure */ + offset = offsetof(struct lightrec_state, curr_pc); jit_stxi_i(offset, LIGHTREC_REG_STATE, JIT_V0); /* Jump to end if state->target_cycle < state->current_cycle */ @@ -1047,7 +1258,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) /* If possible, use the code LUT */ if (!lut_is_32bit(state)) jit_lshi(JIT_V1, JIT_V1, 1); - jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE); + jit_add_state(JIT_V1, JIT_V1); offset = offsetof(struct lightrec_state, code_lut); if (lut_is_32bit(state)) @@ -1067,11 +1278,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* We may call the interpreter - update state->current_cycle */ - jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); - jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE); - jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, JIT_V1); + update_cycle_counter_before_c(_jit); } jit_prepare(); @@ -1089,15 +1296,15 @@ static struct block * generate_dispatcher(struct lightrec_state *state) if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* The interpreter may have updated state->current_cycle and * state->target_cycle - recalc the delta */ - jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, current_cycle)); - jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); - jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); + update_cycle_counter_after_c(_jit); } else { jit_movr(LIGHTREC_REG_CYCLE, JIT_V0); } + /* Reset JIT_V0 to the next PC */ + jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, curr_pc)); + /* If we get non-NULL, loop */ jit_patch_at(jit_bnei(JIT_V1, 0), loop); @@ -1119,6 +1326,10 @@ static struct block * generate_dispatcher(struct lightrec_state *state) goto err_free_block; state->eob_wrapper_func = jit_address(addr2); + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) + state->interpreter_func = jit_address(addr4); + if (OPT_HANDLE_LOAD_DELAYS) + state->ds_check_func = jit_address(addr5); if (OPT_REPLACE_MEMSET) state->memset_func = jit_address(addr3); state->get_next_block = jit_address(addr); @@ -1149,9 +1360,10 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) return (union code) LE32TOH(*code); } -unsigned int lightrec_cycles_of_opcode(union code code) +unsigned int lightrec_cycles_of_opcode(const struct lightrec_state *state, + union code code) { - return 2; + return state->cycles_per_op; } void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops) @@ -1257,11 +1469,6 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, pr_debug("Block size: %hu opcodes\n", block->nb_ops); - /* If the first opcode is an 'impossible' branch, never compile the - * block */ - if (should_emulate(block->opcode_list)) - block_flags |= BLOCK_NEVER_COMPILE; - fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) block_flags |= BLOCK_FULLY_TAGGED; @@ -1277,7 +1484,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, addr = state->get_next_block; lut_write(state, lut_offset(pc), addr); - pr_debug("Recompile count: %u\n", state->nb_precompile++); + pr_debug("Blocks created: %u\n", ++state->nb_precompile); return block; } @@ -1290,8 +1497,12 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; - /* Verify that all load/stores of the opcode list - * Check all loads/stores of the opcode list and mark the + /* If we have one branch that must be emulated, we cannot trash + * the opcode list. */ + if (should_emulate(op)) + return false; + + /* Check all loads/stores of the opcode list and mark the * block as fully compiled if they all have been tagged. */ switch (op->c.i.op) { case OP_LB: @@ -1308,6 +1519,8 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) case OP_SWR: case OP_LWC2: case OP_SWC2: + case OP_META_LWU: + case OP_META_SWU: if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags)) return false; fallthrough; @@ -1323,7 +1536,7 @@ static void lightrec_reap_block(struct lightrec_state *state, void *data) { struct block *block = data; - pr_debug("Reap dead block at PC 0x%08x\n", block->pc); + pr_debug("Reap dead block at "PC_FMT"\n", block->pc); lightrec_unregister_block(state->block_cache, block); lightrec_free_block(state, block); } @@ -1354,6 +1567,8 @@ static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data) int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block) { + struct block *dead_blocks[ARRAY_SIZE(cstate->targets)]; + u32 was_dead[ARRAY_SIZE(cstate->targets) / 8]; struct lightrec_state *state = cstate->state; struct lightrec_branch_target *target; bool fully_tagged = false; @@ -1382,9 +1597,14 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, block->_jit = _jit; lightrec_regcache_reset(cstate->reg_cache); + + if (OPT_PRELOAD_PC && (block->flags & BLOCK_PRELOAD_PC)) + lightrec_preload_pc(cstate->reg_cache, _jit); + cstate->cycles = 0; cstate->nb_local_branches = 0; cstate->nb_targets = 0; + cstate->no_load_delay = false; jit_prolog(); jit_tramp(256); @@ -1403,7 +1623,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_debug("Branch at offset 0x%x will be emulated\n", i << 2); - lightrec_emit_eob(cstate, block, i, false); + lightrec_emit_jump_to_interpreter(cstate, block, i); skip_next = !op_flag_no_ds(elm->flags); } else { lightrec_rec_opcode(cstate, block, i); @@ -1417,7 +1637,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, #endif } - cstate->cycles += lightrec_cycles_of_opcode(elm->c); + cstate->cycles += lightrec_cycles_of_opcode(state, elm->c); } for (i = 0; i < cstate->nb_local_branches; i++) { @@ -1467,11 +1687,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, /* Add compiled function to the LUT */ lut_write(state, lut_offset(block->pc), block->function); - if (ENABLE_THREADED_COMPILER) - lightrec_reaper_continue(state->reaper); - /* Detect old blocks that have been covered by the new one */ - for (i = 0; i < cstate->nb_targets; i++) { + for (i = 0; ENABLE_THREADED_COMPILER && i < cstate->nb_targets; i++) { target = &cstate->targets[i]; if (!target->offset) @@ -1479,12 +1696,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, offset = block->pc + target->offset * sizeof(u32); - /* Pause the reaper while we search for the block until we set - * the BLOCK_IS_DEAD flag, otherwise the block may be removed - * under our feet. */ - if (ENABLE_THREADED_COMPILER) - lightrec_reaper_pause(state->reaper); - block2 = lightrec_find_block(state->block_cache, offset); if (block2) { /* No need to check if block2 is compilable - it must @@ -1493,17 +1704,26 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, /* Set the "block dead" flag to prevent the dynarec from * recompiling this block */ old_flags = block_set_flags(block2, BLOCK_IS_DEAD); + + if (old_flags & BLOCK_IS_DEAD) + was_dead[i / 32] |= BIT(i % 32); + else + was_dead[i / 32] &= ~BIT(i % 32); } - if (ENABLE_THREADED_COMPILER) { - lightrec_reaper_continue(state->reaper); + dead_blocks[i] = block2; - /* If block2 was pending for compilation, cancel it. - * If it's being compiled right now, wait until it - * finishes. */ - if (block2) - lightrec_recompiler_remove(state->rec, block2); - } + /* If block2 was pending for compilation, cancel it. + * If it's being compiled right now, wait until it finishes. */ + if (block2) + lightrec_recompiler_remove(state->rec, block2); + } + + for (i = 0; i < cstate->nb_targets; i++) { + target = &cstate->targets[i]; + + if (!target->offset) + continue; /* We know from now on that block2 (if present) isn't going to * be compiled. We can override the LUT entry with our new @@ -1511,6 +1731,12 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, offset = lut_offset(block->pc) + target->offset; lut_write(state, offset, jit_address(target->label)); + if (ENABLE_THREADED_COMPILER) { + block2 = dead_blocks[i]; + } else { + offset = block->pc + target->offset * sizeof(u32); + block2 = lightrec_find_block(state->block_cache, offset); + } if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " "0x%08x\n", block2->pc, block->pc); @@ -1519,7 +1745,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, if (!ENABLE_THREADED_COMPILER) { lightrec_unregister_block(state->block_cache, block2); lightrec_free_block(state, block2); - } else if (!(old_flags & BLOCK_IS_DEAD)) { + } else if (!(was_dead[i / 32] & BIT(i % 32))) { lightrec_reaper_add(state->reaper, lightrec_reap_block, block2); @@ -1527,6 +1753,9 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, } } + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_continue(state->reaper); + if (ENABLE_DISASSEMBLER) { pr_debug("Compiling block at PC: 0x%08x\n", block->pc); jit_disassemble(); @@ -1538,7 +1767,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) { - pr_debug("Block PC 0x%08x is fully tagged" + pr_debug("Block "PC_FMT" is fully tagged" " - free opcode list\n", block->pc); if (ENABLE_THREADED_COMPILER) { @@ -1567,6 +1796,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, lightrec_unregister(MEM_FOR_CODE, old_code_size); } + pr_debug("Blocks compiled: %u\n", ++state->nb_compile); + return 0; } @@ -1586,7 +1817,7 @@ static void lightrec_print_info(struct lightrec_state *state) u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) { - s32 (*func)(void *, s32) = (void *)state->dispatcher->function; + s32 (*func)(struct lightrec_state *, u32, void *, s32) = (void *)state->dispatcher->function; void *block_trace; s32 cycles_delta; @@ -1597,13 +1828,14 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) target_cycle = UINT_MAX; state->target_cycle = target_cycle; - state->next_pc = pc; + state->curr_pc = pc; block_trace = get_next_block_func(state, pc); if (block_trace) { cycles_delta = state->target_cycle - state->current_cycle; - cycles_delta = (*func)(block_trace, cycles_delta); + cycles_delta = (*func)(state, state->curr_pc, + block_trace, cycles_delta); state->current_cycle = state->target_cycle - cycles_delta; } @@ -1614,7 +1846,7 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) if (LOG_LEVEL >= INFO_L) lightrec_print_info(state); - return state->next_pc; + return state->curr_pc; } u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc, @@ -1703,6 +1935,11 @@ struct lightrec_state * lightrec_init(char *argv0, return NULL; } + if (ops->cop2_notify) + pr_debug("Optional cop2_notify callback in lightrec_ops\n"); + else + pr_debug("No optional cop2_notify callback in lightrec_ops\n"); + if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER && codebuf_map->address) { tlsf = tlsf_create_with_pool(codebuf_map->address, @@ -1723,7 +1960,7 @@ struct lightrec_state * lightrec_init(char *argv0, else lut_size = CODE_LUT_SIZE * sizeof(void *); - init_jit(argv0); + init_jit_with_debug(argv0, stdout); state = calloc(1, sizeof(*state) + lut_size); if (!state) @@ -1733,6 +1970,8 @@ struct lightrec_state * lightrec_init(char *argv0, state->tlsf = tlsf; state->with_32bit_lut = with_32bit_lut; + state->in_delay_slot_n = 0xff; + state->cycles_per_op = 2; state->block_cache = lightrec_blockcache_init(state); if (!state->block_cache) @@ -1767,6 +2006,7 @@ struct lightrec_state * lightrec_init(char *argv0, state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb; state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb; + state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb; state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb; @@ -1878,12 +2118,12 @@ void lightrec_invalidate_all(struct lightrec_state *state) memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE); } -void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only) +void lightrec_set_unsafe_opt_flags(struct lightrec_state *state, u32 flags) { - if (state->invalidate_from_dma_only != dma_only) + if ((flags ^ state->opt_flags) & LIGHTREC_OPT_INV_DMA_ONLY) lightrec_invalidate_all(state); - state->invalidate_from_dma_only = dma_only; + state->opt_flags = flags; } void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags) @@ -1926,3 +2166,8 @@ struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state) { return &state->regs; } + +void lightrec_set_cycles_per_opcode(struct lightrec_state *state, u32 cycles) +{ + state->cycles_per_op = cycles; +}