X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightrec%2Foptimizer.c;h=199ca40bcfb2383e07176c10b828c08835ac791e;hb=2fb8465594a7ffb7ccbac39fdedf7c70085d7f63;hp=38d77d8e8cc9181263e9696e3a8cd9a2ffccfdf1;hpb=03b78a3bf48813202e01149ae0b3c5c1f01efb4c;p=pcsx_rearmed.git diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 38d77d8e..199ca40b 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -114,6 +114,8 @@ static u64 opcode_read_mask(union code op) case OP_SWL: case OP_SW: case OP_SWR: + case OP_META_LWU: + case OP_META_SWU: return BIT(op.i.rs) | BIT(op.i.rt); case OP_META: return BIT(op.m.rs); @@ -186,6 +188,7 @@ u64 opcode_write_mask(union code op) case OP_LBU: case OP_LHU: case OP_LWR: + case OP_META_LWU: return BIT(op.i.rt); case OP_JAL: return BIT(31); @@ -296,6 +299,9 @@ static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg) if (opcode_writes_register(list[i].c, reg)) return true; + if (is_syscall(list[i].c)) + return false; + if (has_delay_slot(list[i].c)) { if (op_flag_no_ds(list[i].flags) || opcode_reads_register(list[i + 1].c, reg)) @@ -339,7 +345,7 @@ static bool reg_is_read_or_written(const struct opcode *list, return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg); } -bool opcode_is_mfc(union code op) +static bool opcode_is_mfc(union code op) { switch (op.i.op) { case OP_CP0: @@ -371,7 +377,7 @@ bool opcode_is_mfc(union code op) return false; } -bool opcode_is_load(union code op) +static bool opcode_is_load(union code op) { switch (op.i.op) { case OP_LB: @@ -382,6 +388,7 @@ bool opcode_is_load(union code op) case OP_LHU: case OP_LWR: case OP_LWC2: + case OP_META_LWU: return true; default: return false; @@ -397,12 +404,19 @@ static bool opcode_is_store(union code op) case OP_SWL: case OP_SWR: case OP_SWC2: + case OP_META_SWU: return true; default: return false; } } +bool opcode_has_load_delay(union code op) +{ + return (opcode_is_load(op) && op.i.rt && op.i.op != OP_LWC2) + || opcode_is_mfc(op); +} + static u8 opcode_get_io_size(union code op) { switch (op.i.op) { @@ -438,6 +452,7 @@ static bool is_nop(union code op) case OP_LBU: case OP_LHU: case OP_LWR: + case OP_META_LWU: return false; default: return true; @@ -822,6 +837,7 @@ static void lightrec_patch_known_zero(struct opcode *op, case OP_SWL: case OP_SW: case OP_SWR: + case OP_META_SWU: if (is_known_zero(v, op->i.rt)) op->i.rt = 0; fallthrough; @@ -834,6 +850,7 @@ static void lightrec_patch_known_zero(struct opcode *op, case OP_LWR: case OP_LWC2: case OP_SWC2: + case OP_META_LWU: if (is_known(v, op->i.rs) && kunseg(v[op->i.rs].value) == 0) op->i.rs = 0; @@ -867,12 +884,19 @@ static void lightrec_reset_syncs(struct block *block) } } +static void maybe_remove_load_delay(struct opcode *op) +{ + if (op_flag_load_delay(op->flags) && opcode_is_load(op->c)) + op->flags &= ~LIGHTREC_LOAD_DELAY; +} + static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { struct opcode *op, *list = block->opcode_list; struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER; unsigned int i; bool local; + int idx; u8 tmp; for (i = 0; i < block->nb_ops; i++) { @@ -907,6 +931,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl (v[op->i.rs].value ^ v[op->i.rt].value)) { pr_debug("Found never-taken BEQ\n"); + if (!op_flag_no_ds(op->flags)) + maybe_remove_load_delay(&list[i + 1]); + local = op_flag_local_branch(op->flags); op->opcode = 0; op->flags = 0; @@ -931,6 +958,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl v[op->i.rs].value == v[op->i.rt].value) { pr_debug("Found never-taken BNE\n"); + if (!op_flag_no_ds(op->flags)) + maybe_remove_load_delay(&list[i + 1]); + local = op_flag_local_branch(op->flags); op->opcode = 0; op->flags = 0; @@ -959,6 +989,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl v[op->i.rs].value & BIT(31)) { pr_debug("Found never-taken BGTZ\n"); + if (!op_flag_no_ds(op->flags)) + maybe_remove_load_delay(&list[i + 1]); + local = op_flag_local_branch(op->flags); op->opcode = 0; op->flags = 0; @@ -1001,6 +1034,40 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl } } break; + case OP_LWL: + case OP_LWR: + if (i == 0 || !has_delay_slot(list[i - 1].c)) { + idx = find_next_reader(list, i + 1, op->i.rt); + if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4) + && list[idx].i.rs == op->i.rs + && list[idx].i.rt == op->i.rt + && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) { + /* Replace a LWL/LWR combo with a META_LWU */ + if (op->i.op == OP_LWL) + op->i.imm -= 3; + op->i.op = OP_META_LWU; + list[idx].opcode = 0; + pr_debug("Convert LWL/LWR to LWU\n"); + } + } + break; + case OP_SWL: + case OP_SWR: + if (i == 0 || !has_delay_slot(list[i - 1].c)) { + idx = find_next_reader(list, i + 1, op->i.rt); + if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4) + && list[idx].i.rs == op->i.rs + && list[idx].i.rt == op->i.rt + && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) { + /* Replace a SWL/SWR combo with a META_SWU */ + if (op->i.op == OP_SWL) + op->i.imm -= 3; + op->i.op = OP_META_SWU; + list[idx].opcode = 0; + pr_debug("Convert SWL/SWR to SWU\n"); + } + } + break; case OP_REGIMM: switch (op->r.rt) { case OP_REGIMM_BLTZ: @@ -1017,6 +1084,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl } else { pr_debug("Found never-taken BLTZ/BGEZ\n"); + if (!op_flag_no_ds(op->flags)) + maybe_remove_load_delay(&list[i + 1]); + local = op_flag_local_branch(op->flags); op->opcode = 0; op->flags = 0; @@ -1321,7 +1391,7 @@ static int lightrec_handle_load_delays(struct lightrec_state *state, for (i = 0; i < block->nb_ops; i++) { op = &list[i]; - if (!opcode_is_load(op->c) || !op->c.i.rt || op->c.i.op == OP_LWC2) + if (!opcode_has_load_delay(op->c)) continue; if (!is_delay_slot(list, i)) {