git subrepo pull --force deps/lightrec
authorPaul Cercueil <paul@crapouillou.net>
Fri, 6 Oct 2023 21:33:00 +0000 (23:33 +0200)
committerPaul Cercueil <paul@crapouillou.net>
Fri, 6 Oct 2023 21:34:32 +0000 (23:34 +0200)
subrepo:
  subdir:   "deps/lightrec"
  merged:   "83b50408ae"
upstream:
  origin:   "https://github.com/pcercuei/lightrec.git"
  branch:   "master"
  commit:   "83b50408ae"
git-subrepo:
  version:  "0.4.6"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "110b9eb"

deps/lightrec/.gitrepo
deps/lightrec/constprop.c
deps/lightrec/disassembler.c
deps/lightrec/disassembler.h
deps/lightrec/emitter.c
deps/lightrec/interpreter.c
deps/lightrec/lightrec.c
deps/lightrec/lightrec.h
deps/lightrec/optimizer.c

index 6696c60..94e769a 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/lightrec.git
        branch = master
-       commit = b53e0e808d1425d93d3430f526b9f739b1a9c42e
-       parent = fb865ffe3d7e066905271b7b9e678d63dc7b780e
+       commit = 83b50408ae3eeeca408d3cc7ce05393e27dbe83b
+       parent = 537a9a8cd4c69a081dc1790099153a9a17da6e31
        method = merge
        cmdver = 0.4.6
index bdae0e2..19403a6 100644 (file)
@@ -662,6 +662,7 @@ void lightrec_consts_propagate(const struct block *block,
                }
                fallthrough;
        case OP_LW:
+       case OP_META_LWU:
                v[c.i.rt].known = 0;
                v[c.i.rt].sign = 0;
                break;
index 8bfaf4d..5111d17 100644 (file)
@@ -40,6 +40,10 @@ static const char * const std_opcodes[] = {
        [OP_SWR]                = "swr     ",
        [OP_LWC2]               = "lwc2    ",
        [OP_SWC2]               = "swc2    ",
+       [OP_META_MULT2]         = "mult2   ",
+       [OP_META_MULTU2]        = "multu2  ",
+       [OP_META_LWU]           = "lwu     ",
+       [OP_META_SWU]           = "swu     ",
 };
 
 static const char * const special_opcodes[] = {
@@ -444,6 +448,8 @@ static int print_op(union code c, u32 pc, char *buf, size_t len,
        case OP_SWL:
        case OP_SW:
        case OP_SWR:
+       case OP_META_LWU:
+       case OP_META_SWU:
                *flags_ptr = opcode_io_flags;
                *nb_flags = ARRAY_SIZE(opcode_io_flags);
                *is_io = true;
index e05a093..a19588a 100644 (file)
@@ -115,6 +115,8 @@ enum standard_opcodes {
 
        OP_META_MULT2           = 0x19,
        OP_META_MULTU2          = 0x1a,
+       OP_META_LWU             = 0x1b,
+       OP_META_SWU             = 0x1c,
 };
 
 enum special_opcodes {
index 6f482cc..2bd4dca 100644 (file)
@@ -14,6 +14,8 @@
 #include <stdbool.h>
 #include <stddef.h>
 
+#define LIGHTNING_UNALIGNED_32BIT 4
+
 typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16);
 
 /* Forward declarations */
@@ -942,6 +944,8 @@ static void rec_alu_mult(struct lightrec_cstate *state,
        u8 reg_hi = get_mult_div_hi(c);
        jit_state_t *_jit = block->_jit;
        u8 lo, hi, rs, rt, rflags = 0;
+       bool no_lo = op_flag_no_lo(flags);
+       bool no_hi = op_flag_no_hi(flags);
 
        jit_note(__FILE__, __LINE__);
 
@@ -953,44 +957,46 @@ static void rec_alu_mult(struct lightrec_cstate *state,
        rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags);
        rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags);
 
-       if (!op_flag_no_lo(flags))
+       if (!no_lo)
                lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
-       else if (__WORDSIZE == 32)
-               lo = lightrec_alloc_reg_temp(reg_cache, _jit);
 
-       if (!op_flag_no_hi(flags))
+       if (!no_hi)
                hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT);
 
        if (__WORDSIZE == 32) {
                /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit
                 * operation if the MULT was detected a 32-bit only. */
-               if (!op_flag_no_hi(flags)) {
+               if (no_lo) {
                        if (is_signed)
-                               jit_qmulr(lo, hi, rs, rt);
+                               jit_hmulr(hi, rs, rt);
                        else
-                               jit_qmulr_u(lo, hi, rs, rt);
-               } else {
+                               jit_hmulr_u(hi, rs, rt);
+               } else if (no_hi) {
                        jit_mulr(lo, rs, rt);
+               } else if (is_signed) {
+                       jit_qmulr(lo, hi, rs, rt);
+               } else {
+                       jit_qmulr_u(lo, hi, rs, rt);
                }
        } else {
                /* On 64-bit systems, do a 64*64->64 bit operation. */
-               if (op_flag_no_lo(flags)) {
+               if (no_lo) {
                        jit_mulr(hi, rs, rt);
                        jit_rshi(hi, hi, 32);
                } else {
                        jit_mulr(lo, rs, rt);
 
                        /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */
-                       if (!op_flag_no_hi(flags))
+                       if (!no_hi)
                                jit_rshi(hi, lo, 32);
                }
        }
 
        lightrec_free_reg(reg_cache, rs);
        lightrec_free_reg(reg_cache, rt);
-       if (!op_flag_no_lo(flags) || __WORDSIZE == 32)
+       if (!no_lo)
                lightrec_free_reg(reg_cache, lo);
-       if (!op_flag_no_hi(flags))
+       if (!no_hi)
                lightrec_free_reg(reg_cache, hi);
 }
 
@@ -1270,7 +1276,8 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
        s32 lut_offt = offsetof(struct lightrec_state, code_lut);
        bool no_mask = op_flag_no_mask(op->flags);
        bool add_imm = c.i.imm &&
-               ((!state->mirrors_mapped && !no_mask) || (invalidate &&
+               (c.i.op == OP_META_SWU
+                || (!state->mirrors_mapped && !no_mask) || (invalidate &&
                ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
        bool need_tmp = !no_mask || add_imm || invalidate;
        bool swc2 = c.i.op == OP_SWC2;
@@ -1320,9 +1327,15 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
                tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
 
                jit_new_node_ww(swap_code, tmp3, rt);
-               jit_new_node_www(code, imm, addr_reg2, tmp3);
+
+               if (c.i.op == OP_META_SWU)
+                       jit_unstr(addr_reg2, tmp3, LIGHTNING_UNALIGNED_32BIT);
+               else
+                       jit_new_node_www(code, imm, addr_reg2, tmp3);
 
                lightrec_free_reg(reg_cache, tmp3);
+       } else if (c.i.op == OP_META_SWU) {
+               jit_unstr(addr_reg2, rt, LIGHTNING_UNALIGNED_32BIT);
        } else {
                jit_new_node_www(code, imm, addr_reg2, rt);
        }
@@ -1428,7 +1441,7 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
        reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit, addr_mask);
 
        /* Convert to KUNSEG and avoid RAM mirrors */
-       if (!state->mirrors_mapped && c.i.imm) {
+       if ((c.i.op == OP_META_SWU || !state->mirrors_mapped) && c.i.imm) {
                imm = 0;
                jit_addi(tmp, rs, (s16)c.i.imm);
                jit_andr(tmp, tmp, reg_imm);
@@ -1468,9 +1481,15 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
                tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
 
                jit_new_node_ww(swap_code, tmp2, rt);
-               jit_new_node_www(code, imm, tmp, tmp2);
+
+               if (c.i.op == OP_META_SWU)
+                       jit_unstr(tmp, tmp2, LIGHTNING_UNALIGNED_32BIT);
+               else
+                       jit_new_node_www(code, imm, tmp, tmp2);
 
                lightrec_free_reg(reg_cache, tmp2);
+       } else if (c.i.op == OP_META_SWU) {
+               jit_unstr(tmp, rt, LIGHTNING_UNALIGNED_32BIT);
        } else {
                jit_new_node_www(code, imm, tmp, rt);
        }
@@ -1540,6 +1559,18 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block
        else
                jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3);
 
+       if (c.i.op == OP_META_SWU) {
+               /* With a SWU opcode, we might have touched the following 32-bit
+                * word, so invalidate it as well */
+               if (lut_is_32bit(state)) {
+                       jit_stxi_i(offsetof(struct lightrec_state, code_lut) + 4,
+                                  tmp, tmp3);
+               } else {
+                       jit_stxi(offsetof(struct lightrec_state, code_lut)
+                                + sizeof(uintptr_t), tmp, tmp3);
+               }
+       }
+
        if (different_offsets) {
                jit_movi(tmp, state->offset_ram);
 
@@ -1565,9 +1596,15 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block
                tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
 
                jit_new_node_ww(swap_code, tmp, rt);
-               jit_new_node_www(code, 0, tmp2, tmp);
+
+               if (c.i.op == OP_META_SWU)
+                       jit_unstr(tmp2, tmp, LIGHTNING_UNALIGNED_32BIT);
+               else
+                       jit_new_node_www(code, 0, tmp2, tmp);
 
                lightrec_free_reg(reg_cache, tmp);
+       } else if (c.i.op == OP_META_SWU) {
+               jit_unstr(tmp2, rt, LIGHTNING_UNALIGNED_32BIT);
        } else {
                jit_new_node_www(code, 0, tmp2, rt);
        }
@@ -1696,7 +1733,8 @@ static void rec_load_memory(struct lightrec_cstate *cstate,
        rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
        rt = lightrec_alloc_reg_out(reg_cache, _jit, out_reg, flags);
 
-       if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) {
+       if ((op->i.op == OP_META_LWU && c.i.imm)
+           || (!cstate->state->mirrors_mapped && c.i.imm && !no_mask)) {
                jit_addi(rt, rs, (s16)c.i.imm);
                addr_reg = rt;
                imm = 0;
@@ -1705,6 +1743,9 @@ static void rec_load_memory(struct lightrec_cstate *cstate,
                imm = (s16)c.i.imm;
        }
 
+       if (op->i.op == OP_META_LWU)
+               imm = LIGHTNING_UNALIGNED_32BIT;
+
        if (!no_mask) {
                reg_imm = lightrec_alloc_reg_temp_with_value(reg_cache, _jit,
                                                             addr_mask);
@@ -1815,7 +1856,8 @@ static void rec_load_direct(struct lightrec_cstate *cstate,
 
        if ((state->offset_ram == state->offset_bios &&
            state->offset_ram == state->offset_scratch &&
-           state->mirrors_mapped) || !c.i.imm) {
+           state->mirrors_mapped && c.i.op != OP_META_LWU)
+           || !c.i.imm) {
                addr_reg = rs;
                imm = (s16)c.i.imm;
        } else {
@@ -1827,6 +1869,9 @@ static void rec_load_direct(struct lightrec_cstate *cstate,
                        lightrec_free_reg(reg_cache, rs);
        }
 
+       if (op->i.op == OP_META_LWU)
+               imm = LIGHTNING_UNALIGNED_32BIT;
+
        tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
 
        if (state->offset_ram == state->offset_bios &&
@@ -2780,6 +2825,29 @@ static void rec_meta_COM(struct lightrec_cstate *state,
        lightrec_free_reg(reg_cache, rd);
 }
 
+static void rec_meta_LWU(struct lightrec_cstate *state,
+                        const struct block *block,
+                        u16 offset)
+{
+       jit_code_t code;
+
+       if (is_big_endian() && __WORDSIZE == 64)
+               code = jit_code_unldr_u;
+       else
+               code = jit_code_unldr;
+
+       _jit_name(block->_jit, __func__);
+       rec_load(state, block, offset, code, jit_code_bswapr_ui, false);
+}
+
+static void rec_meta_SWU(struct lightrec_cstate *state,
+                        const struct block *block,
+                        u16 offset)
+{
+       _jit_name(block->_jit, __func__);
+       rec_store(state, block, offset, jit_code_unstr, jit_code_bswapr_ui);
+}
+
 static void unknown_opcode(struct lightrec_cstate *state,
                           const struct block *block, u16 offset)
 {
@@ -2825,6 +2893,8 @@ static const lightrec_rec_func_t rec_standard[64] = {
        [OP_META]               = rec_META,
        [OP_META_MULT2]         = rec_meta_MULT2,
        [OP_META_MULTU2]        = rec_meta_MULT2,
+       [OP_META_LWU]           = rec_meta_LWU,
+       [OP_META_SWU]           = rec_meta_SWU,
 };
 
 static const lightrec_rec_func_t rec_special[64] = {
index 247fdb6..9078b69 100644 (file)
@@ -1065,6 +1065,8 @@ static const lightrec_int_func_t int_standard[64] = {
        [OP_META]               = int_META,
        [OP_META_MULT2]         = int_META_MULT2,
        [OP_META_MULTU2]        = int_META_MULT2,
+       [OP_META_LWU]           = int_load,
+       [OP_META_SWU]           = int_store,
 };
 
 static const lightrec_int_func_t int_special[64] = {
index 5cf4598..696a5dd 100644 (file)
@@ -80,6 +80,27 @@ static u32 lightrec_default_lw(struct lightrec_state *state,
        return LE32TOH(*(u32 *)host);
 }
 
+static u32 lightrec_default_lwu(struct lightrec_state *state,
+                               u32 opcode, void *host, u32 addr)
+{
+       u32 val;
+
+       memcpy(&val, host, 4);
+
+       return LE32TOH(val);
+}
+
+static void lightrec_default_swu(struct lightrec_state *state, u32 opcode,
+                                void *host, u32 addr, u32 data)
+{
+       data = HTOLE32(data);
+
+       memcpy(host, &data, 4);
+
+       if (!(state->opt_flags & LIGHTREC_OPT_INV_DMA_ONLY))
+               lightrec_invalidate(state, addr & ~0x3, 8);
+}
+
 static const struct lightrec_mem_map_ops lightrec_default_ops = {
        .sb = lightrec_default_sb,
        .sh = lightrec_default_sh,
@@ -87,6 +108,8 @@ static const struct lightrec_mem_map_ops lightrec_default_ops = {
        .lb = lightrec_default_lb,
        .lh = lightrec_default_lh,
        .lw = lightrec_default_lw,
+       .lwu = lightrec_default_lwu,
+       .swu = lightrec_default_swu,
 };
 
 static void __segfault_cb(struct lightrec_state *state, u32 addr,
@@ -331,6 +354,11 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base,
                return lightrec_lwl(state, ops, opcode, host, addr, data);
        case OP_LWR:
                return lightrec_lwr(state, ops, opcode, host, addr, data);
+       case OP_META_LWU:
+               return ops->lwu(state, opcode, host, addr);
+       case OP_META_SWU:
+               ops->swu(state, opcode, host, addr, data);
+               return 0;
        case OP_LW:
        default:
                return ops->lw(state, opcode, host, addr);
@@ -352,6 +380,7 @@ static void lightrec_rw_helper(struct lightrec_state *state,
        case OP_LWL:
        case OP_LWR:
        case OP_LW:
+       case OP_META_LWU:
                if (OPT_HANDLE_LOAD_DELAYS && unlikely(!state->in_delay_slot_n)) {
                        state->temp_reg = ret;
                        state->in_delay_slot_n = 0xff;
@@ -1482,6 +1511,8 @@ static bool lightrec_block_is_fully_tagged(const struct block *block)
                case OP_SWR:
                case OP_LWC2:
                case OP_SWC2:
+               case OP_META_LWU:
+               case OP_META_SWU:
                        if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags))
                                return false;
                        fallthrough;
index 5a66e73..4cda1a0 100644 (file)
@@ -95,6 +95,9 @@ struct lightrec_mem_map_ops {
        u8 (*lb)(struct lightrec_state *, u32 opcode, void *host, u32 addr);
        u16 (*lh)(struct lightrec_state *, u32 opcode, void *host, u32 addr);
        u32 (*lw)(struct lightrec_state *, u32 opcode, void *host, u32 addr);
+       u32 (*lwu)(struct lightrec_state *, u32 opcode, void *host, u32 addr);
+       void (*swu)(struct lightrec_state *, u32 opcode,
+                   void *host, u32 addr, u32 data);
 };
 
 struct lightrec_mem_map {
index c01e024..90b2139 100644 (file)
@@ -114,6 +114,8 @@ static u64 opcode_read_mask(union code op)
        case OP_SWL:
        case OP_SW:
        case OP_SWR:
+       case OP_META_LWU:
+       case OP_META_SWU:
                return BIT(op.i.rs) | BIT(op.i.rt);
        case OP_META:
                return BIT(op.m.rs);
@@ -186,6 +188,7 @@ u64 opcode_write_mask(union code op)
        case OP_LBU:
        case OP_LHU:
        case OP_LWR:
+       case OP_META_LWU:
                return BIT(op.i.rt);
        case OP_JAL:
                return BIT(31);
@@ -382,6 +385,7 @@ bool opcode_is_load(union code op)
        case OP_LHU:
        case OP_LWR:
        case OP_LWC2:
+       case OP_META_LWU:
                return true;
        default:
                return false;
@@ -397,6 +401,7 @@ static bool opcode_is_store(union code op)
        case OP_SWL:
        case OP_SWR:
        case OP_SWC2:
+       case OP_META_SWU:
                return true;
        default:
                return false;
@@ -438,6 +443,7 @@ static bool is_nop(union code op)
                case OP_LBU:
                case OP_LHU:
                case OP_LWR:
+               case OP_META_LWU:
                        return false;
                default:
                        return true;
@@ -822,6 +828,7 @@ static void lightrec_patch_known_zero(struct opcode *op,
        case OP_SWL:
        case OP_SW:
        case OP_SWR:
+       case OP_META_SWU:
                if (is_known_zero(v, op->i.rt))
                        op->i.rt = 0;
                fallthrough;
@@ -834,6 +841,7 @@ static void lightrec_patch_known_zero(struct opcode *op,
        case OP_LWR:
        case OP_LWC2:
        case OP_SWC2:
+       case OP_META_LWU:
                if (is_known(v, op->i.rs)
                    && kunseg(v[op->i.rs].value) == 0)
                        op->i.rs = 0;
@@ -879,6 +887,7 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
        struct constprop_data v[32] = LIGHTREC_CONSTPROP_INITIALIZER;
        unsigned int i;
        bool local;
+       int idx;
        u8 tmp;
 
        for (i = 0; i < block->nb_ops; i++) {
@@ -1016,6 +1025,40 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                }
                        }
                        break;
+               case OP_LWL:
+               case OP_LWR:
+                       if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+                               idx = find_next_reader(list, i + 1, op->i.rt);
+                               if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+                                   && list[idx].i.rs == op->i.rs
+                                   && list[idx].i.rt == op->i.rt
+                                   && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+                                       /* Replace a LWL/LWR combo with a META_LWU */
+                                       if (op->i.op == OP_LWL)
+                                               op->i.imm -= 3;
+                                       op->i.op = OP_META_LWU;
+                                       list[idx].opcode = 0;
+                                       pr_debug("Convert LWL/LWR to LWU\n");
+                               }
+                       }
+                       break;
+               case OP_SWL:
+               case OP_SWR:
+                       if (i == 0 || !has_delay_slot(list[i - 1].c)) {
+                               idx = find_next_reader(list, i + 1, op->i.rt);
+                               if (idx > 0 && list[idx].i.op == (op->i.op ^ 0x4)
+                                   && list[idx].i.rs == op->i.rs
+                                   && list[idx].i.rt == op->i.rt
+                                   && abs((s16)op->i.imm - (s16)list[idx].i.imm) == 3) {
+                                       /* Replace a SWL/SWR combo with a META_SWU */
+                                       if (op->i.op == OP_SWL)
+                                               op->i.imm -= 3;
+                                       op->i.op = OP_META_SWU;
+                                       list[idx].opcode = 0;
+                                       pr_debug("Convert SWL/SWR to SWU\n");
+                               }
+                       }
+                       break;
                case OP_REGIMM:
                        switch (op->r.rt) {
                        case OP_REGIMM_BLTZ: