git subrepo pull --force deps/lightrec
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
index 38d77d8..199ca40 100644 (file)
@@ -114,6 +114,8 @@ static u64 opcode_read_mask(union code op)
        case OP_SWL:
        case OP_SW:
        case OP_SWR:
+       case OP_META_LWU:
+       case OP_META_SWU:
                return BIT(op.i.rs) | BIT(op.i.rt);
        case OP_META:
                return BIT(op.m.rs);
@@ -186,6 +188,7 @@ u64 opcode_write_mask(union code op)
        case OP_LBU:
        case OP_LHU:
        case OP_LWR:
+       case OP_META_LWU:
                return BIT(op.i.rt);
        case OP_JAL:
                return BIT(31);
@@ -296,6 +299,9 @@ static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
                if (opcode_writes_register(list[i].c, reg))
                        return true;
 
+               if (is_syscall(list[i].c))
+                       return false;
+
                if (has_delay_slot(list[i].c)) {
                        if (op_flag_no_ds(list[i].flags) ||
                            opcode_reads_register(list[i + 1].c, reg))
@@ -339,7 +345,7 @@ static bool reg_is_read_or_written(const struct opcode *list,
        return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
 }
 
-bool opcode_is_mfc(union code op)
+static bool opcode_is_mfc(union code op)
 {
        switch (op.i.op) {
        case OP_CP0:
@@ -371,7 +377,7 @@ bool opcode_is_mfc(union code op)
        return false;
 }
 
-bool opcode_is_load(union code op)
+static bool opcode_is_load(union code op)
 {
        switch (op.i.op) {
        case OP_LB:
@@ -382,6 +388,7 @@ bool opcode_is_load(union code op)
        case OP_LHU:
        case OP_LWR:
        case OP_LWC2:
+       case OP_META_LWU:
                return true;
        default:
                return false;
@@ -397,12 +404,19 @@ static bool opcode_is_store(union code op)
        case OP_SWL:
        case OP_SWR:
        case OP_SWC2:
+       case OP_META_SWU:
                return true;
        default:
                return false;
        }
 }
 
+bool opcode_has_load_delay(union code op)
+{
+       return (opcode_is_load(op) && op.i.rt && op.i.op != OP_LWC2)
+               || opcode_is_mfc(op);
+}
+
 static u8 opcode_get_io_size(union code op)
 {
        switch (op.i.op) {
@@ -438,6 +452,7 @@ static bool is_nop(union code op)
                case OP_LBU:
                case OP_LHU:
                case OP_LWR:
+               case OP_META_LWU:
                        return false;
                default:
                        return true;
@@ -822,6 +837,7 @@ static void lightrec_patch_known_zero(struct opcode *op,
        case OP_SWL:
        case OP_SW:
        case OP_SWR:
+       case OP_META_SWU:
                if (is_known_zero(v, op->i.rt))
                        op->i.rt = 0;
                fallthrough;
@@ -834,6 +850,7 @@ static void lightrec_patch_known_zero(struct opcode *op,
        case OP_LWR:
        case OP_LWC2:
        case OP_SWC2:
+       case OP_META_LWU:
                if (is_known(v, op->i.rs)
                    && kunseg(v[op->i.rs].value) == 0)
                        op->i.rs = 0;
@@ -867,12 +884,19 @@ static void lightrec_reset_syncs(struct block *block)
        }
 }
 
+static void maybe_remove_load_delay(struct opcode *op)
+{
+       if (op_flag_load_delay(op->flags) && opcode_is_load(op->c))
+               op->flags &= ~LIGHTREC_LOAD_DELAY;
+}
+
 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
 {
        struct opcode *op, *list = block->opcode_list;
        struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
        unsigned int i;
        bool local;
+       int idx;
        u8 tmp;
 
        for (i = 0; i < block->nb_ops; i++) {
@@ -907,6 +931,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                   (v[op->i.rs].value ^ v[op->i.rt].value)) {
                                pr_debug("Found never-taken BEQ\n");
 
+                               if (!op_flag_no_ds(op->flags))
+                                       maybe_remove_load_delay(&list[i + 1]);
+
                                local = op_flag_local_branch(op->flags);
                                op->opcode = 0;
                                op->flags = 0;
@@ -931,6 +958,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                   v[op->i.rs].value == v[op->i.rt].value) {
                                pr_debug("Found never-taken BNE\n");
 
+                               if (!op_flag_no_ds(op->flags))
+                                       maybe_remove_load_delay(&list[i + 1]);
+
                                local = op_flag_local_branch(op->flags);
                                op->opcode = 0;
                                op->flags = 0;
@@ -959,6 +989,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                            v[op->i.rs].value & BIT(31)) {
                                pr_debug("Found never-taken BGTZ\n");
 
+                               if (!op_flag_no_ds(op->flags))
+                                       maybe_remove_load_delay(&list[i + 1]);
+
                                local = op_flag_local_branch(op->flags);
                                op->opcode = 0;
                                op->flags = 0;
@@ -1001,6 +1034,40 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                }
                        }
                        break;
+               case OP_LWL:
+               case OP_LWR:
+                       if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+                               idx = find_next_reader(list, i + 1, op->i.rt);
+                               if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+                                   && list[idx].i.rs == op->i.rs
+                                   && list[idx].i.rt == op->i.rt
+                                   && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+                                       /* Replace a LWL/LWR combo with a META_LWU */
+                                       if (op->i.op == OP_LWL)
+                                               op->i.imm -= 3;
+                                       op->i.op = OP_META_LWU;
+                                       list[idx].opcode = 0;
+                                       pr_debug("Convert LWL/LWR to LWU\n");
+                               }
+                       }
+                       break;
+               case OP_SWL:
+               case OP_SWR:
+                       if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+                               idx = find_next_reader(list, i + 1, op->i.rt);
+                               if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+                                   && list[idx].i.rs == op->i.rs
+                                   && list[idx].i.rt == op->i.rt
+                                   && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+                                       /* Replace a SWL/SWR combo with a META_SWU */
+                                       if (op->i.op == OP_SWL)
+                                               op->i.imm -= 3;
+                                       op->i.op = OP_META_SWU;
+                                       list[idx].opcode = 0;
+                                       pr_debug("Convert SWL/SWR to SWU\n");
+                               }
+                       }
+                       break;
                case OP_REGIMM:
                        switch (op->r.rt) {
                        case OP_REGIMM_BLTZ:
@@ -1017,6 +1084,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                } else {
                                        pr_debug("Found never-taken BLTZ/BGEZ\n");
 
+                                       if (!op_flag_no_ds(op->flags))
+                                               maybe_remove_load_delay(&list[i + 1]);
+
                                        local = op_flag_local_branch(op->flags);
                                        op->opcode = 0;
                                        op->flags = 0;
@@ -1321,7 +1391,7 @@ static int lightrec_handle_load_delays(struct lightrec_state *state,
        for (i = 0; i < block->nb_ops; i++) {
                op = &list[i];
 
-               if (!opcode_is_load(op->c) || !op->c.i.rt || op->c.i.op == OP_LWC2)
+               if (!opcode_has_load_delay(op->c))
                        continue;
 
                if (!is_delay_slot(list, i)) {