git subrepo pull --force deps/lightrec
[pcsx_rearmed.git] / deps / lightrec / optimizer.c
index 98a26f6..7304abc 100644 (file)
@@ -277,7 +277,8 @@ static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg)
                        return true;
 
                if (has_delay_slot(list[i].c)) {
-                       if (list[i].flags & LIGHTREC_NO_DS)
+                       if (list[i].flags & LIGHTREC_NO_DS ||
+                           opcode_reads_register(list[i + 1].c, reg))
                                return false;
 
                        return opcode_writes_register(list[i + 1].c, reg);
@@ -459,12 +460,18 @@ bool load_in_delay_slot(union code op)
        return false;
 }
 
-static u32 lightrec_propagate_consts(const struct opcode *op, u32 known, u32 *v)
+static u32 lightrec_propagate_consts(const struct opcode *op,
+                                    const struct opcode *prev,
+                                    u32 known, u32 *v)
 {
-       union code c = op->c;
+       union code c = prev->c;
+
+       /* Register $zero is always, well, zero */
+       known |= BIT(0);
+       v[0] = 0;
 
        if (op->flags & LIGHTREC_SYNC)
-               return 0;
+               return BIT(0);
 
        switch (c.i.op) {
        case OP_SPECIAL:
@@ -813,15 +820,19 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset)
 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
 {
        struct opcode *list = block->opcode_list;
-       struct opcode *op;
+       struct opcode *prev, *op = NULL;
        u32 known = BIT(0);
        u32 values[32] = { 0 };
        unsigned int i;
        int reader;
 
        for (i = 0; i < block->nb_ops; i++) {
+               prev = op;
                op = &list[i];
 
+               if (prev)
+                       known = lightrec_propagate_consts(op, prev, known, values);
+
                /* Transform all opcodes detected as useless to real NOPs
                 * (0x0: SLL r0, r0, #0) */
                if (op->opcode != 0 && is_nop(op->c)) {
@@ -833,10 +844,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                if (!op->opcode)
                        continue;
 
-               /* Register $zero is always, well, zero */
-               known |= BIT(0);
-               values[0] = 0;
-
                switch (op->i.op) {
                case OP_BEQ:
                        if (op->i.rs == op->i.rt) {
@@ -933,8 +940,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                default: /* fall-through */
                        break;
                }
-
-               known = lightrec_propagate_consts(op, known, values);
        }
 
        return 0;
@@ -1229,18 +1234,19 @@ static int lightrec_early_unload(struct lightrec_state *state, struct block *blo
 static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
 {
        const struct lightrec_mem_map *map;
-       struct opcode *list;
+       struct opcode *prev2, *prev = NULL, *list = NULL;
        u32 known = BIT(0);
        u32 values[32] = { 0 };
        unsigned int i;
        u32 val;
 
        for (i = 0; i < block->nb_ops; i++) {
+               prev2 = prev;
+               prev = list;
                list = &block->opcode_list[i];
 
-               /* Register $zero is always, well, zero */
-               known |= BIT(0);
-               values[0] = 0;
+               if (prev)
+                       known = lightrec_propagate_consts(list, prev, known, values);
 
                switch (list->i.op) {
                case OP_SB:
@@ -1283,24 +1289,47 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
                case OP_LWR:
                case OP_LWC2:
                        if (OPT_FLAG_IO && (known & BIT(list->i.rs))) {
-                               val = kunseg(values[list->i.rs] + (s16) list->i.imm);
-                               map = lightrec_get_map(state, NULL, val);
+                               if (prev && prev->i.op == OP_LUI &&
+                                   !(prev2 && has_delay_slot(prev2->c)) &&
+                                   prev->i.rt == list->i.rs &&
+                                   list->i.rt == list->i.rs &&
+                                   prev->i.imm & 0x8000) {
+                                       pr_debug("Convert LUI at offset 0x%x to kuseg\n",
+                                                i - 1 << 2);
+
+                                       val = kunseg(prev->i.imm << 16);
+                                       prev->i.imm = val >> 16;
+                                       values[list->i.rs] = val;
+                               }
+
+                               val = values[list->i.rs] + (s16) list->i.imm;
+                               map = lightrec_get_map(state, NULL, kunseg(val));
 
                                if (!map || map->ops ||
                                    map == &state->maps[PSX_MAP_PARALLEL_PORT]) {
-                                       pr_debug("Flagging opcode %u as accessing I/O registers\n",
+                                       pr_debug("Flagging opcode %u as I/O access\n",
                                                 i);
-                                       list->flags |= LIGHTREC_HW_IO;
-                               } else {
-                                       pr_debug("Flaging opcode %u as direct memory access\n", i);
-                                       list->flags |= LIGHTREC_DIRECT_IO;
+                                       list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW);
+                                       break;
+                               }
+
+                               if (val - map->pc < map->length)
+                                       list->flags |= LIGHTREC_NO_MASK;
+
+                               if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) {
+                                       pr_debug("Flaging opcode %u as RAM access\n", i);
+                                       list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
+                               } else if (map == &state->maps[PSX_MAP_BIOS]) {
+                                       pr_debug("Flaging opcode %u as BIOS access\n", i);
+                                       list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
+                               } else if (map == &state->maps[PSX_MAP_SCRATCH_PAD]) {
+                                       pr_debug("Flaging opcode %u as scratchpad access\n", i);
+                                       list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
                                }
                        }
                default: /* fall-through */
                        break;
                }
-
-               known = lightrec_propagate_consts(list, known, values);
        }
 
        return 0;
@@ -1476,23 +1505,43 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset,
        }
 }
 
+static bool lightrec_always_skip_div_check(void)
+{
+#ifdef __mips__
+       return true;
+#else
+       return false;
+#endif
+}
+
 static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
 {
-       struct opcode *list;
+       struct opcode *prev, *list = NULL;
        u8 reg_hi, reg_lo;
        unsigned int i;
+       u32 known = BIT(0);
+       u32 values[32] = { 0 };
 
        for (i = 0; i < block->nb_ops - 1; i++) {
+               prev = list;
                list = &block->opcode_list[i];
 
+               if (prev)
+                       known = lightrec_propagate_consts(list, prev, known, values);
+
                if (list->i.op != OP_SPECIAL)
                        continue;
 
                switch (list->r.op) {
-               case OP_SPECIAL_MULT:
-               case OP_SPECIAL_MULTU:
                case OP_SPECIAL_DIV:
                case OP_SPECIAL_DIVU:
+                       /* If we are dividing by a non-zero constant, don't
+                        * emit the div-by-zero check. */
+                       if (lightrec_always_skip_div_check() ||
+                           (known & BIT(list->c.r.rt) && values[list->c.r.rt]))
+                               list->flags |= LIGHTREC_NO_DIV_CHECK;
+               case OP_SPECIAL_MULT: /* fall-through */
+               case OP_SPECIAL_MULTU:
                        break;
                default:
                        continue;
@@ -1500,8 +1549,9 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *
 
                /* Don't support opcodes in delay slots */
                if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
-                   (list->flags & LIGHTREC_NO_DS))
+                   (list->flags & LIGHTREC_NO_DS)) {
                        continue;
+               }
 
                reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
                if (reg_lo == 0) {