X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightrec%2Foptimizer.c;h=90b21398c519ced986d59c7802ea94366e2321da;hb=2b1b10dd333f8b525b90a15468824aae0ff4eb2f;hp=5ce58adaec772429fbaeaa41fde12568cc4e0310;hpb=cb72ea130a5ef1b2f47691ed586ad48bb0c39269;p=pcsx_rearmed.git diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 5ce58ada..90b21398 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -114,6 +114,8 @@ static u64 opcode_read_mask(union code op) case OP_SWL: case OP_SW: case OP_SWR: + case OP_META_LWU: + case OP_META_SWU: return BIT(op.i.rs) | BIT(op.i.rt); case OP_META: return BIT(op.m.rs); @@ -186,6 +188,7 @@ u64 opcode_write_mask(union code op) case OP_LBU: case OP_LHU: case OP_LWR: + case OP_META_LWU: return BIT(op.i.rt); case OP_JAL: return BIT(31); @@ -382,6 +385,7 @@ bool opcode_is_load(union code op) case OP_LHU: case OP_LWR: case OP_LWC2: + case OP_META_LWU: return true; default: return false; @@ -397,6 +401,7 @@ static bool opcode_is_store(union code op) case OP_SWL: case OP_SWR: case OP_SWC2: + case OP_META_SWU: return true; default: return false; @@ -438,6 +443,7 @@ static bool is_nop(union code op) case OP_LBU: case OP_LHU: case OP_LWR: + case OP_META_LWU: return false; default: return true; @@ -596,7 +602,7 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset, pr_debug("Convert LHU+SLL+SRA to LH\n"); v[ldop->i.rt].known = 0; - v[ldop->i.rt].sign = 0xffffff80 << 24 - curr->r.imm; + v[ldop->i.rt].sign = 0xffffff80 << (24 - curr->r.imm); } } @@ -647,6 +653,31 @@ lightrec_remove_useless_lui(struct block *block, unsigned int offset, } } +static void lightrec_lui_to_movi(struct block *block, unsigned int offset) +{ + struct opcode *ori, *lui = &block->opcode_list[offset]; + int next; + + if (lui->i.op != OP_LUI) + return; + + next = find_next_reader(block->opcode_list, offset + 1, lui->i.rt); + if (next > 0) { + ori = &block->opcode_list[next]; + + switch (ori->i.op) { + case OP_ORI: + case OP_ADDI: + case OP_ADDIU: + if (ori->i.rs == ori->i.rt && ori->i.imm) { + ori->flags |= LIGHTREC_MOVI; + lui->flags |= LIGHTREC_MOVI; + } + break; + } + } +} + static void lightrec_modify_lui(struct block *block, unsigned int offset) { union code c, *lui = &block->opcode_list[offset].c; @@ -669,7 +700,7 @@ static void lightrec_modify_lui(struct block *block, unsigned int offset) } pr_debug("Convert LUI at offset 0x%x to kuseg\n", - i - 1 << 2); + (i - 1) << 2); lui->i.imm = kunseg(lui->i.imm << 16) >> 16; break; } @@ -797,6 +828,7 @@ static void lightrec_patch_known_zero(struct opcode *op, case OP_SWL: case OP_SW: case OP_SWR: + case OP_META_SWU: if (is_known_zero(v, op->i.rt)) op->i.rt = 0; fallthrough; @@ -809,6 +841,7 @@ static void lightrec_patch_known_zero(struct opcode *op, case OP_LWR: case OP_LWC2: case OP_SWC2: + case OP_META_LWU: if (is_known(v, op->i.rs) && kunseg(v[op->i.rs].value) == 0) op->i.rs = 0; @@ -842,12 +875,19 @@ static void lightrec_reset_syncs(struct block *block) } } +static void maybe_remove_load_delay(struct opcode *op) +{ + if (op_flag_load_delay(op->flags) && opcode_is_load(op->c)) + op->flags &= ~LIGHTREC_LOAD_DELAY; +} + static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { struct opcode *op, *list = block->opcode_list; struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER; unsigned int i; bool local; + int idx; u8 tmp; for (i = 0; i < block->nb_ops; i++) { @@ -882,6 +922,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl (v[op->i.rs].value ^ v[op->i.rt].value)) { pr_debug("Found never-taken BEQ\n"); + if (!op_flag_no_ds(op->flags)) + maybe_remove_load_delay(&list[i + 1]); + local = op_flag_local_branch(op->flags); op->opcode = 0; op->flags = 0; @@ -906,6 +949,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl v[op->i.rs].value == v[op->i.rt].value) { pr_debug("Found never-taken BNE\n"); + if (!op_flag_no_ds(op->flags)) + maybe_remove_load_delay(&list[i + 1]); + local = op_flag_local_branch(op->flags); op->opcode = 0; op->flags = 0; @@ -934,6 +980,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl v[op->i.rs].value & BIT(31)) { pr_debug("Found never-taken BGTZ\n"); + if (!op_flag_no_ds(op->flags)) + maybe_remove_load_delay(&list[i + 1]); + local = op_flag_local_branch(op->flags); op->opcode = 0; op->flags = 0; @@ -947,6 +996,8 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl if (i == 0 || !has_delay_slot(list[i - 1].c)) lightrec_modify_lui(block, i); lightrec_remove_useless_lui(block, i, v); + if (i == 0 || !has_delay_slot(list[i - 1].c)) + lightrec_lui_to_movi(block, i); break; /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU @@ -974,6 +1025,40 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl } } break; + case OP_LWL: + case OP_LWR: + if (i == 0 || !has_delay_slot(list[i - 1].c)) { + idx = find_next_reader(list, i + 1, op->i.rt); + if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4) + && list[idx].i.rs == op->i.rs + && list[idx].i.rt == op->i.rt + && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) { + /* Replace a LWL/LWR combo with a META_LWU */ + if (op->i.op == OP_LWL) + op->i.imm -= 3; + op->i.op = OP_META_LWU; + list[idx].opcode = 0; + pr_debug("Convert LWL/LWR to LWU\n"); + } + } + break; + case OP_SWL: + case OP_SWR: + if (i == 0 || !has_delay_slot(list[i - 1].c)) { + idx = find_next_reader(list, i + 1, op->i.rt); + if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4) + && list[idx].i.rs == op->i.rs + && list[idx].i.rt == op->i.rt + && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) { + /* Replace a SWL/SWR combo with a META_SWU */ + if (op->i.op == OP_SWL) + op->i.imm -= 3; + op->i.op = OP_META_SWU; + list[idx].opcode = 0; + pr_debug("Convert SWL/SWR to SWU\n"); + } + } + break; case OP_REGIMM: switch (op->r.rt) { case OP_REGIMM_BLTZ: @@ -990,6 +1075,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl } else { pr_debug("Found never-taken BLTZ/BGEZ\n"); + if (!op_flag_no_ds(op->flags)) + maybe_remove_load_delay(&list[i + 1]); + local = op_flag_local_branch(op->flags); op->opcode = 0; op->flags = 0; @@ -1340,8 +1428,16 @@ static int lightrec_swap_load_delays(struct lightrec_state *state, } else if (!in_ds && opcode_is_load(c) && c.i.op != OP_LWC2) { next = block->opcode_list[i + 1].c; - if (c.i.op == OP_LWL && next.i.op == OP_LWR) + switch (next.i.op) { + case OP_LWL: + case OP_LWR: + case OP_REGIMM: + case OP_BEQ: + case OP_BNE: + case OP_BLEZ: + case OP_BGTZ: continue; + } if (opcode_reads_register(next, c.i.rt) && !opcode_writes_register(next, c.i.rs)) { @@ -1735,7 +1831,10 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) /* Assume that all I/O operations that target * $sp or $gp will always only target a mapped * memory (RAM, BIOS, scratchpad). */ - list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + if (state->opt_flags & LIGHTREC_OPT_SP_GP_HIT_RAM) + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM); + else + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); } fallthrough; @@ -2149,6 +2248,66 @@ static int lightrec_replace_memset(struct lightrec_state *state, struct block *b return 0; } +static int lightrec_test_preload_pc(struct lightrec_state *state, struct block *block) +{ + unsigned int i; + union code c; + u32 flags; + + for (i = 0; i < block->nb_ops; i++) { + c = block->opcode_list[i].c; + flags = block->opcode_list[i].flags; + + if (op_flag_sync(flags)) + break; + + switch (c.i.op) { + case OP_J: + case OP_JAL: + block->flags |= BLOCK_PRELOAD_PC; + return 0; + + case OP_REGIMM: + switch (c.r.rt) { + case OP_REGIMM_BLTZAL: + case OP_REGIMM_BGEZAL: + block->flags |= BLOCK_PRELOAD_PC; + return 0; + default: + break; + } + fallthrough; + case OP_BEQ: + case OP_BNE: + case OP_BLEZ: + case OP_BGTZ: + if (!op_flag_local_branch(flags)) { + block->flags |= BLOCK_PRELOAD_PC; + return 0; + } + + case OP_SPECIAL: + switch (c.r.op) { + case OP_SPECIAL_JALR: + if (c.r.rd) { + block->flags |= BLOCK_PRELOAD_PC; + return 0; + } + break; + case OP_SPECIAL_SYSCALL: + case OP_SPECIAL_BREAK: + block->flags |= BLOCK_PRELOAD_PC; + return 0; + default: + break; + } + break; + } + } + + return 0; +} + static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = { IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence), IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset), @@ -2162,6 +2321,7 @@ static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block * IF_OPT(OPT_FLAG_IO, &lightrec_flag_io), IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs), IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload), + IF_OPT(OPT_PRELOAD_PC, &lightrec_test_preload_pc), }; int lightrec_optimize(struct lightrec_state *state, struct block *block)