libretro: adjust psxclock description
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
index 58d9d56..199ca40 100644 (file)
@@ -114,6 +114,8 @@ static u64 opcode_read_mask(union code op)
        case OP_SWL:
        case OP_SW:
        case OP_SWR:
+       case OP_META_LWU:
+       case OP_META_SWU:
                return BIT(op.i.rs) | BIT(op.i.rt);
        case OP_META:
                return BIT(op.m.rs);
@@ -186,6 +188,7 @@ u64 opcode_write_mask(union code op)
        case OP_LBU:
        case OP_LHU:
        case OP_LWR:
+       case OP_META_LWU:
                return BIT(op.i.rt);
        case OP_JAL:
                return BIT(31);
@@ -296,6 +299,9 @@ static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
                if (opcode_writes_register(list[i].c, reg))
                        return true;
 
+               if (is_syscall(list[i].c))
+                       return false;
+
                if (has_delay_slot(list[i].c)) {
                        if (op_flag_no_ds(list[i].flags) ||
                            opcode_reads_register(list[i + 1].c, reg))
@@ -339,7 +345,7 @@ static bool reg_is_read_or_written(const struct opcode *list,
        return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg);
 }
 
-bool opcode_is_mfc(union code op)
+static bool opcode_is_mfc(union code op)
 {
        switch (op.i.op) {
        case OP_CP0:
@@ -371,7 +377,7 @@ bool opcode_is_mfc(union code op)
        return false;
 }
 
-bool opcode_is_load(union code op)
+static bool opcode_is_load(union code op)
 {
        switch (op.i.op) {
        case OP_LB:
@@ -382,6 +388,7 @@ bool opcode_is_load(union code op)
        case OP_LHU:
        case OP_LWR:
        case OP_LWC2:
+       case OP_META_LWU:
                return true;
        default:
                return false;
@@ -397,12 +404,19 @@ static bool opcode_is_store(union code op)
        case OP_SWL:
        case OP_SWR:
        case OP_SWC2:
+       case OP_META_SWU:
                return true;
        default:
                return false;
        }
 }
 
+bool opcode_has_load_delay(union code op)
+{
+       return (opcode_is_load(op) && op.i.rt && op.i.op != OP_LWC2)
+               || opcode_is_mfc(op);
+}
+
 static u8 opcode_get_io_size(union code op)
 {
        switch (op.i.op) {
@@ -438,6 +452,7 @@ static bool is_nop(union code op)
                case OP_LBU:
                case OP_LHU:
                case OP_LWR:
+               case OP_META_LWU:
                        return false;
                default:
                        return true;
@@ -596,7 +611,7 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset,
                                pr_debug("Convert LHU+SLL+SRA to LH\n");
 
                        v[ldop->i.rt].known = 0;
-                       v[ldop->i.rt].sign = 0xffffff80 << 24 - curr->r.imm;
+                       v[ldop->i.rt].sign = 0xffffff80 << (24 - curr->r.imm);
                }
        }
 
@@ -647,6 +662,31 @@ lightrec_remove_useless_lui(struct block *block, unsigned int offset,
        }
 }
 
+static void lightrec_lui_to_movi(struct block *block, unsigned int offset)
+{
+       struct opcode *ori, *lui = &block->opcode_list[offset];
+       int next;
+
+       if (lui->i.op != OP_LUI)
+               return;
+
+       next = find_next_reader(block->opcode_list, offset + 1, lui->i.rt);
+       if (next > 0) {
+               ori = &block->opcode_list[next];
+
+               switch (ori->i.op) {
+               case OP_ORI:
+               case OP_ADDI:
+               case OP_ADDIU:
+                       if (ori->i.rs == ori->i.rt && ori->i.imm) {
+                               ori->flags |= LIGHTREC_MOVI;
+                               lui->flags |= LIGHTREC_MOVI;
+                       }
+                       break;
+               }
+       }
+}
+
 static void lightrec_modify_lui(struct block *block, unsigned int offset)
 {
        union code c, *lui = &block->opcode_list[offset].c;
@@ -669,7 +709,7 @@ static void lightrec_modify_lui(struct block *block, unsigned int offset)
                        }
 
                        pr_debug("Convert LUI at offset 0x%x to kuseg\n",
-                                i - 1 << 2);
+                                (i - 1) << 2);
                        lui->i.imm = kunseg(lui->i.imm << 16) >> 16;
                        break;
                }
@@ -797,6 +837,7 @@ static void lightrec_patch_known_zero(struct opcode *op,
        case OP_SWL:
        case OP_SW:
        case OP_SWR:
+       case OP_META_SWU:
                if (is_known_zero(v, op->i.rt))
                        op->i.rt = 0;
                fallthrough;
@@ -809,6 +850,7 @@ static void lightrec_patch_known_zero(struct opcode *op,
        case OP_LWR:
        case OP_LWC2:
        case OP_SWC2:
+       case OP_META_LWU:
                if (is_known(v, op->i.rs)
                    && kunseg(v[op->i.rs].value) == 0)
                        op->i.rs = 0;
@@ -842,12 +884,19 @@ static void lightrec_reset_syncs(struct block *block)
        }
 }
 
+static void maybe_remove_load_delay(struct opcode *op)
+{
+       if (op_flag_load_delay(op->flags) && opcode_is_load(op->c))
+               op->flags &= ~LIGHTREC_LOAD_DELAY;
+}
+
 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
 {
        struct opcode *op, *list = block->opcode_list;
        struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
        unsigned int i;
        bool local;
+       int idx;
        u8 tmp;
 
        for (i = 0; i < block->nb_ops; i++) {
@@ -882,6 +931,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                   (v[op->i.rs].value ^ v[op->i.rt].value)) {
                                pr_debug("Found never-taken BEQ\n");
 
+                               if (!op_flag_no_ds(op->flags))
+                                       maybe_remove_load_delay(&list[i + 1]);
+
                                local = op_flag_local_branch(op->flags);
                                op->opcode = 0;
                                op->flags = 0;
@@ -906,6 +958,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                   v[op->i.rs].value == v[op->i.rt].value) {
                                pr_debug("Found never-taken BNE\n");
 
+                               if (!op_flag_no_ds(op->flags))
+                                       maybe_remove_load_delay(&list[i + 1]);
+
                                local = op_flag_local_branch(op->flags);
                                op->opcode = 0;
                                op->flags = 0;
@@ -934,6 +989,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                            v[op->i.rs].value & BIT(31)) {
                                pr_debug("Found never-taken BGTZ\n");
 
+                               if (!op_flag_no_ds(op->flags))
+                                       maybe_remove_load_delay(&list[i + 1]);
+
                                local = op_flag_local_branch(op->flags);
                                op->opcode = 0;
                                op->flags = 0;
@@ -947,6 +1005,8 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                        if (i == 0 || !has_delay_slot(list[i - 1].c))
                                lightrec_modify_lui(block, i);
                        lightrec_remove_useless_lui(block, i, v);
+                       if (i == 0 || !has_delay_slot(list[i - 1].c))
+                               lightrec_lui_to_movi(block, i);
                        break;
 
                /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
@@ -974,6 +1034,40 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                }
                        }
                        break;
+               case OP_LWL:
+               case OP_LWR:
+                       if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+                               idx = find_next_reader(list, i + 1, op->i.rt);
+                               if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+                                   && list[idx].i.rs == op->i.rs
+                                   && list[idx].i.rt == op->i.rt
+                                   && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+                                       /* Replace a LWL/LWR combo with a META_LWU */
+                                       if (op->i.op == OP_LWL)
+                                               op->i.imm -= 3;
+                                       op->i.op = OP_META_LWU;
+                                       list[idx].opcode = 0;
+                                       pr_debug("Convert LWL/LWR to LWU\n");
+                               }
+                       }
+                       break;
+               case OP_SWL:
+               case OP_SWR:
+                       if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+                               idx = find_next_reader(list, i + 1, op->i.rt);
+                               if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+                                   && list[idx].i.rs == op->i.rs
+                                   && list[idx].i.rt == op->i.rt
+                                   && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+                                       /* Replace a SWL/SWR combo with a META_SWU */
+                                       if (op->i.op == OP_SWL)
+                                               op->i.imm -= 3;
+                                       op->i.op = OP_META_SWU;
+                                       list[idx].opcode = 0;
+                                       pr_debug("Convert SWL/SWR to SWU\n");
+                               }
+                       }
+                       break;
                case OP_REGIMM:
                        switch (op->r.rt) {
                        case OP_REGIMM_BLTZ:
@@ -990,6 +1084,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                } else {
                                        pr_debug("Found never-taken BLTZ/BGEZ\n");
 
+                                       if (!op_flag_no_ds(op->flags))
+                                               maybe_remove_load_delay(&list[i + 1]);
+
                                        local = op_flag_local_branch(op->flags);
                                        op->opcode = 0;
                                        op->flags = 0;
@@ -1294,7 +1391,7 @@ static int lightrec_handle_load_delays(struct lightrec_state *state,
        for (i = 0; i < block->nb_ops; i++) {
                op = &list[i];
 
-               if (!opcode_is_load(op->c) || !op->c.i.rt || op->c.i.op == OP_LWC2)
+               if (!opcode_has_load_delay(op->c))
                        continue;
 
                if (!is_delay_slot(list, i)) {
@@ -1743,7 +1840,10 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
                                /* Assume that all I/O operations that target
                                 * $sp or $gp will always only target a mapped
                                 * memory (RAM, BIOS, scratchpad). */
-                               list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
+                               if (state->opt_flags & LIGHTREC_OPT_SP_GP_HIT_RAM)
+                                       list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
+                               else
+                                       list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT);
                        }
 
                        fallthrough;
@@ -2157,6 +2257,66 @@ static int lightrec_replace_memset(struct lightrec_state *state, struct block *b
        return 0;
 }
 
+static int lightrec_test_preload_pc(struct lightrec_state *state, struct block *block)
+{
+       unsigned int i;
+       union code c;
+       u32 flags;
+
+       for (i = 0; i < block->nb_ops; i++) {
+               c = block->opcode_list[i].c;
+               flags = block->opcode_list[i].flags;
+
+               if (op_flag_sync(flags))
+                       break;
+
+               switch (c.i.op) {
+               case OP_J:
+               case OP_JAL:
+                       block->flags |= BLOCK_PRELOAD_PC;
+                       return 0;
+
+               case OP_REGIMM:
+                       switch (c.r.rt) {
+                       case OP_REGIMM_BLTZAL:
+                       case OP_REGIMM_BGEZAL:
+                               block->flags |= BLOCK_PRELOAD_PC;
+                               return 0;
+                       default:
+                               break;
+                       }
+                       fallthrough;
+               case OP_BEQ:
+               case OP_BNE:
+               case OP_BLEZ:
+               case OP_BGTZ:
+                       if (!op_flag_local_branch(flags)) {
+                               block->flags |= BLOCK_PRELOAD_PC;
+                               return 0;
+                       }
+
+               case OP_SPECIAL:
+                       switch (c.r.op) {
+                       case OP_SPECIAL_JALR:
+                               if (c.r.rd) {
+                                       block->flags |= BLOCK_PRELOAD_PC;
+                                       return 0;
+                               }
+                               break;
+                       case OP_SPECIAL_SYSCALL:
+                       case OP_SPECIAL_BREAK:
+                               block->flags |= BLOCK_PRELOAD_PC;
+                               return 0;
+                       default:
+                               break;
+                       }
+                       break;
+               }
+       }
+
+       return 0;
+}
+
 static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = {
        IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence),
        IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset),
@@ -2170,6 +2330,7 @@ static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *
        IF_OPT(OPT_FLAG_IO, &lightrec_flag_io),
        IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs),
        IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload),
+       IF_OPT(OPT_PRELOAD_PC, &lightrec_test_preload_pc),
 };
 
 int lightrec_optimize(struct lightrec_state *state, struct block *block)