From 5459088bf71ee4da726a70f191d301da2a121910 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 6 Oct 2023 23:33:00 +0200 Subject: [PATCH] git subrepo pull --force deps/lightrec subrepo: subdir: "deps/lightrec" merged: "83b50408ae" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "83b50408ae" git-subrepo: version: "0.4.6" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "110b9eb" --- deps/lightrec/.gitrepo | 4 +- deps/lightrec/constprop.c | 1 + deps/lightrec/disassembler.c | 6 ++ deps/lightrec/disassembler.h | 2 + deps/lightrec/emitter.c | 108 +++++++++++++++++++++++++++++------ deps/lightrec/interpreter.c | 2 + deps/lightrec/lightrec.c | 31 ++++++++++ deps/lightrec/lightrec.h | 3 + deps/lightrec/optimizer.c | 43 ++++++++++++++ 9 files changed, 179 insertions(+), 21 deletions(-) diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 6696c606..94e769a8 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = b53e0e808d1425d93d3430f526b9f739b1a9c42e - parent = fb865ffe3d7e066905271b7b9e678d63dc7b780e + commit = 83b50408ae3eeeca408d3cc7ce05393e27dbe83b + parent = 537a9a8cd4c69a081dc1790099153a9a17da6e31 method = merge cmdver = 0.4.6 diff --git a/deps/lightrec/constprop.c b/deps/lightrec/constprop.c index bdae0e2f..19403a69 100644 --- a/deps/lightrec/constprop.c +++ b/deps/lightrec/constprop.c @@ -662,6 +662,7 @@ void lightrec_consts_propagate(const struct block *block, } fallthrough; case OP_LW: + case OP_META_LWU: v[c.i.rt].known = 0; v[c.i.rt].sign = 0; break; diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index 8bfaf4d0..5111d173 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -40,6 +40,10 @@ static const char * const std_opcodes[] = { [OP_SWR] = "swr ", [OP_LWC2] = "lwc2 ", [OP_SWC2] = "swc2 ", + [OP_META_MULT2] = "mult2 ", + [OP_META_MULTU2] = "multu2 ", + [OP_META_LWU] = "lwu ", + [OP_META_SWU] = "swu ", }; static const char * const special_opcodes[] = { @@ -444,6 +448,8 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, case OP_SWL: case OP_SW: case OP_SWR: + case OP_META_LWU: + case OP_META_SWU: *flags_ptr = opcode_io_flags; *nb_flags = ARRAY_SIZE(opcode_io_flags); *is_io = true; diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h index e05a093c..a19588a1 100644 --- a/deps/lightrec/disassembler.h +++ b/deps/lightrec/disassembler.h @@ -115,6 +115,8 @@ enum standard_opcodes { OP_META_MULT2 = 0x19, OP_META_MULTU2 = 0x1a, + OP_META_LWU = 0x1b, + OP_META_SWU = 0x1c, }; enum special_opcodes { diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 6f482cc4..2bd4dcad 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -14,6 +14,8 @@ #include #include +#define LIGHTNING_UNALIGNED_32BIT 4 + typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16); /* Forward declarations */ @@ -942,6 +944,8 @@ static void rec_alu_mult(struct lightrec_cstate *state, u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; u8 lo, hi, rs, rt, rflags = 0; + bool no_lo = op_flag_no_lo(flags); + bool no_hi = op_flag_no_hi(flags); jit_note(__FILE__, __LINE__); @@ -953,44 +957,46 @@ static void rec_alu_mult(struct lightrec_cstate *state, rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags); - if (!op_flag_no_lo(flags)) + if (!no_lo) lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); - else if (__WORDSIZE == 32) - lo = lightrec_alloc_reg_temp(reg_cache, _jit); - if (!op_flag_no_hi(flags)) + if (!no_hi) hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT); if (__WORDSIZE == 32) { /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit * operation if the MULT was detected a 32-bit only. */ - if (!op_flag_no_hi(flags)) { + if (no_lo) { if (is_signed) - jit_qmulr(lo, hi, rs, rt); + jit_hmulr(hi, rs, rt); else - jit_qmulr_u(lo, hi, rs, rt); - } else { + jit_hmulr_u(hi, rs, rt); + } else if (no_hi) { jit_mulr(lo, rs, rt); + } else if (is_signed) { + jit_qmulr(lo, hi, rs, rt); + } else { + jit_qmulr_u(lo, hi, rs, rt); } } else { /* On 64-bit systems, do a 64*64->64 bit operation. */ - if (op_flag_no_lo(flags)) { + if (no_lo) { jit_mulr(hi, rs, rt); jit_rshi(hi, hi, 32); } else { jit_mulr(lo, rs, rt); /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */ - if (!op_flag_no_hi(flags)) + if (!no_hi) jit_rshi(hi, lo, 32); } } lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); - if (!op_flag_no_lo(flags) || __WORDSIZE == 32) + if (!no_lo) lightrec_free_reg(reg_cache, lo); - if (!op_flag_no_hi(flags)) + if (!no_hi) lightrec_free_reg(reg_cache, hi); } @@ -1270,7 +1276,8 @@ static void rec_store_memory(struct lightrec_cstate *cstate, s32 lut_offt = offsetof(struct lightrec_state, code_lut); bool no_mask = op_flag_no_mask(op->flags); bool add_imm = c.i.imm && - ((!state->mirrors_mapped && !no_mask) || (invalidate && + (c.i.op == OP_META_SWU + || (!state->mirrors_mapped && !no_mask) || (invalidate && ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); bool need_tmp = !no_mask || add_imm || invalidate; bool swc2 = c.i.op == OP_SWC2; @@ -1320,9 +1327,15 @@ static void rec_store_memory(struct lightrec_cstate *cstate, tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); jit_new_node_ww(swap_code, tmp3, rt); - jit_new_node_www(code, imm, addr_reg2, tmp3); + + if (c.i.op == OP_META_SWU) + jit_unstr(addr_reg2, tmp3, LIGHTNING_UNALIGNED_32BIT); + else + jit_new_node_www(code, imm, addr_reg2, tmp3); lightrec_free_reg(reg_cache, tmp3); + } else if (c.i.op == OP_META_SWU) { + jit_unstr(addr_reg2, rt, LIGHTNING_UNALIGNED_32BIT); } else { jit_new_node_www(code, imm, addr_reg2, rt); } @@ -1428,7 +1441,7 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, addr_mask); /* Convert to KUNSEG and avoid RAM mirrors */ - if (!state->mirrors_mapped && c.i.imm) { + if ((c.i.op == OP_META_SWU || !state->mirrors_mapped) && c.i.imm) { imm = 0; jit_addi(tmp, rs, (s16)c.i.imm); jit_andr(tmp, tmp, reg_imm); @@ -1468,9 +1481,15 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); jit_new_node_ww(swap_code, tmp2, rt); - jit_new_node_www(code, imm, tmp, tmp2); + + if (c.i.op == OP_META_SWU) + jit_unstr(tmp, tmp2, LIGHTNING_UNALIGNED_32BIT); + else + jit_new_node_www(code, imm, tmp, tmp2); lightrec_free_reg(reg_cache, tmp2); + } else if (c.i.op == OP_META_SWU) { + jit_unstr(tmp, rt, LIGHTNING_UNALIGNED_32BIT); } else { jit_new_node_www(code, imm, tmp, rt); } @@ -1540,6 +1559,18 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block else jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + if (c.i.op == OP_META_SWU) { + /* With a SWU opcode, we might have touched the following 32-bit + * word, so invalidate it as well */ + if (lut_is_32bit(state)) { + jit_stxi_i(offsetof(struct lightrec_state, code_lut) + 4, + tmp, tmp3); + } else { + jit_stxi(offsetof(struct lightrec_state, code_lut) + + sizeof(uintptr_t), tmp, tmp3); + } + } + if (different_offsets) { jit_movi(tmp, state->offset_ram); @@ -1565,9 +1596,15 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block tmp = lightrec_alloc_reg_temp(reg_cache, _jit); jit_new_node_ww(swap_code, tmp, rt); - jit_new_node_www(code, 0, tmp2, tmp); + + if (c.i.op == OP_META_SWU) + jit_unstr(tmp2, tmp, LIGHTNING_UNALIGNED_32BIT); + else + jit_new_node_www(code, 0, tmp2, tmp); lightrec_free_reg(reg_cache, tmp); + } else if (c.i.op == OP_META_SWU) { + jit_unstr(tmp2, rt, LIGHTNING_UNALIGNED_32BIT); } else { jit_new_node_www(code, 0, tmp2, rt); } @@ -1696,7 +1733,8 @@ static void rec_load_memory(struct lightrec_cstate *cstate, rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags); - if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) { + if ((op->i.op == OP_META_LWU && c.i.imm) + || (!cstate->state->mirrors_mapped && c.i.imm && !no_mask)) { jit_addi(rt, rs, (s16)c.i.imm); addr_reg = rt; imm = 0; @@ -1705,6 +1743,9 @@ static void rec_load_memory(struct lightrec_cstate *cstate, imm = (s16)c.i.imm; } + if (op->i.op == OP_META_LWU) + imm = LIGHTNING_UNALIGNED_32BIT; + if (!no_mask) { reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, addr_mask); @@ -1815,7 +1856,8 @@ static void rec_load_direct(struct lightrec_cstate *cstate, if ((state->offset_ram == state->offset_bios && state->offset_ram == state->offset_scratch && - state->mirrors_mapped) || !c.i.imm) { + state->mirrors_mapped && c.i.op != OP_META_LWU) + || !c.i.imm) { addr_reg = rs; imm = (s16)c.i.imm; } else { @@ -1827,6 +1869,9 @@ static void rec_load_direct(struct lightrec_cstate *cstate, lightrec_free_reg(reg_cache, rs); } + if (op->i.op == OP_META_LWU) + imm = LIGHTNING_UNALIGNED_32BIT; + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); if (state->offset_ram == state->offset_bios && @@ -2780,6 +2825,29 @@ static void rec_meta_COM(struct lightrec_cstate *state, lightrec_free_reg(reg_cache, rd); } +static void rec_meta_LWU(struct lightrec_cstate *state, + const struct block *block, + u16 offset) +{ + jit_code_t code; + + if (is_big_endian() && __WORDSIZE == 64) + code = jit_code_unldr_u; + else + code = jit_code_unldr; + + _jit_name(block->_jit, __func__); + rec_load(state, block, offset, code, jit_code_bswapr_ui, false); +} + +static void rec_meta_SWU(struct lightrec_cstate *state, + const struct block *block, + u16 offset) +{ + _jit_name(block->_jit, __func__); + rec_store(state, block, offset, jit_code_unstr, jit_code_bswapr_ui); +} + static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset) { @@ -2825,6 +2893,8 @@ static const lightrec_rec_func_t rec_standard[64] = { [OP_META] = rec_META, [OP_META_MULT2] = rec_meta_MULT2, [OP_META_MULTU2] = rec_meta_MULT2, + [OP_META_LWU] = rec_meta_LWU, + [OP_META_SWU] = rec_meta_SWU, }; static const lightrec_rec_func_t rec_special[64] = { diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c index 247fdb65..9078b693 100644 --- a/deps/lightrec/interpreter.c +++ b/deps/lightrec/interpreter.c @@ -1065,6 +1065,8 @@ static const lightrec_int_func_t int_standard[64] = { [OP_META] = int_META, [OP_META_MULT2] = int_META_MULT2, [OP_META_MULTU2] = int_META_MULT2, + [OP_META_LWU] = int_load, + [OP_META_SWU] = int_store, }; static const lightrec_int_func_t int_special[64] = { diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 5cf45983..696a5ddd 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -80,6 +80,27 @@ static u32 lightrec_default_lw(struct lightrec_state *state, return LE32TOH(*(u32 *)host); } +static u32 lightrec_default_lwu(struct lightrec_state *state, + u32 opcode, void *host, u32 addr) +{ + u32 val; + + memcpy(&val, host, 4); + + return LE32TOH(val); +} + +static void lightrec_default_swu(struct lightrec_state *state, u32 opcode, + void *host, u32 addr, u32 data) +{ + data = HTOLE32(data); + + memcpy(host, &data, 4); + + if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY)) + lightrec_invalidate(state, addr & ~0x3, 8); +} + static const struct lightrec_mem_map_ops lightrec_default_ops = { .sb = lightrec_default_sb, .sh = lightrec_default_sh, @@ -87,6 +108,8 @@ static const struct lightrec_mem_map_ops lightrec_default_ops = { .lb = lightrec_default_lb, .lh = lightrec_default_lh, .lw = lightrec_default_lw, + .lwu = lightrec_default_lwu, + .swu = lightrec_default_swu, }; static void __segfault_cb(struct lightrec_state *state, u32 addr, @@ -331,6 +354,11 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base, return lightrec_lwl(state, ops, opcode, host, addr, data); case OP_LWR: return lightrec_lwr(state, ops, opcode, host, addr, data); + case OP_META_LWU: + return ops->lwu(state, opcode, host, addr); + case OP_META_SWU: + ops->swu(state, opcode, host, addr, data); + return 0; case OP_LW: default: return ops->lw(state, opcode, host, addr); @@ -352,6 +380,7 @@ static void lightrec_rw_helper(struct lightrec_state *state, case OP_LWL: case OP_LWR: case OP_LW: + case OP_META_LWU: if (OPT_HANDLE_LOAD_DELAYS && unlikely(!state->in_delay_slot_n)) { state->temp_reg = ret; state->in_delay_slot_n = 0xff; @@ -1482,6 +1511,8 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) case OP_SWR: case OP_LWC2: case OP_SWC2: + case OP_META_LWU: + case OP_META_SWU: if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags)) return false; fallthrough; diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h index 5a66e73e..4cda1a0a 100644 --- a/deps/lightrec/lightrec.h +++ b/deps/lightrec/lightrec.h @@ -95,6 +95,9 @@ struct lightrec_mem_map_ops { u8 (*lb)(struct lightrec_state *, u32 opcode, void *host, u32 addr); u16 (*lh)(struct lightrec_state *, u32 opcode, void *host, u32 addr); u32 (*lw)(struct lightrec_state *, u32 opcode, void *host, u32 addr); + u32 (*lwu)(struct lightrec_state *, u32 opcode, void *host, u32 addr); + void (*swu)(struct lightrec_state *, u32 opcode, + void *host, u32 addr, u32 data); }; struct lightrec_mem_map { diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index c01e024b..90b21398 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -114,6 +114,8 @@ static u64 opcode_read_mask(union code op) case OP_SWL: case OP_SW: case OP_SWR: + case OP_META_LWU: + case OP_META_SWU: return BIT(op.i.rs) | BIT(op.i.rt); case OP_META: return BIT(op.m.rs); @@ -186,6 +188,7 @@ u64 opcode_write_mask(union code op) case OP_LBU: case OP_LHU: case OP_LWR: + case OP_META_LWU: return BIT(op.i.rt); case OP_JAL: return BIT(31); @@ -382,6 +385,7 @@ bool opcode_is_load(union code op) case OP_LHU: case OP_LWR: case OP_LWC2: + case OP_META_LWU: return true; default: return false; @@ -397,6 +401,7 @@ static bool opcode_is_store(union code op) case OP_SWL: case OP_SWR: case OP_SWC2: + case OP_META_SWU: return true; default: return false; @@ -438,6 +443,7 @@ static bool is_nop(union code op) case OP_LBU: case OP_LHU: case OP_LWR: + case OP_META_LWU: return false; default: return true; @@ -822,6 +828,7 @@ static void lightrec_patch_known_zero(struct opcode *op, case OP_SWL: case OP_SW: case OP_SWR: + case OP_META_SWU: if (is_known_zero(v, op->i.rt)) op->i.rt = 0; fallthrough; @@ -834,6 +841,7 @@ static void lightrec_patch_known_zero(struct opcode *op, case OP_LWR: case OP_LWC2: case OP_SWC2: + case OP_META_LWU: if (is_known(v, op->i.rs) && kunseg(v[op->i.rs].value) == 0) op->i.rs = 0; @@ -879,6 +887,7 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER; unsigned int i; bool local; + int idx; u8 tmp; for (i = 0; i < block->nb_ops; i++) { @@ -1016,6 +1025,40 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl } } break; + case OP_LWL: + case OP_LWR: + if (i == 0 || !has_delay_slot(list[i - 1].c)) { + idx = find_next_reader(list, i + 1, op->i.rt); + if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4) + && list[idx].i.rs == op->i.rs + && list[idx].i.rt == op->i.rt + && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) { + /* Replace a LWL/LWR combo with a META_LWU */ + if (op->i.op == OP_LWL) + op->i.imm -= 3; + op->i.op = OP_META_LWU; + list[idx].opcode = 0; + pr_debug("Convert LWL/LWR to LWU\n"); + } + } + break; + case OP_SWL: + case OP_SWR: + if (i == 0 || !has_delay_slot(list[i - 1].c)) { + idx = find_next_reader(list, i + 1, op->i.rt); + if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4) + && list[idx].i.rs == op->i.rs + && list[idx].i.rt == op->i.rt + && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) { + /* Replace a SWL/SWR combo with a META_SWU */ + if (op->i.op == OP_SWL) + op->i.imm -= 3; + op->i.op = OP_META_SWU; + list[idx].opcode = 0; + pr_debug("Convert SWL/SWR to SWU\n"); + } + } + break; case OP_REGIMM: switch (op->r.rt) { case OP_REGIMM_BLTZ: -- 2.39.5