Merge pull request #643 from pcercuei/update-lightrec-20220408
authorAutechre <libretro@gmail.com>
Fri, 8 Apr 2022 19:46:31 +0000 (20:46 +0100)
committerGitHub <noreply@github.com>
Fri, 8 Apr 2022 19:46:31 +0000 (20:46 +0100)
Update lightrec 20220408

18 files changed:
deps/lightning/.gitrepo
deps/lightning/check/movzr.ok [new file with mode: 0644]
deps/lightning/check/movzr.tst [new file with mode: 0644]
deps/lightning/doc/body.texi
deps/lightning/include/lightning.h.in
deps/lightning/lib/jit_mips-cpu.c
deps/lightning/lib/jit_mips-sz.c
deps/lightning/lib/jit_mips.c
deps/lightning/lib/jit_x86-cpu.c
deps/lightning/lib/jit_x86-sz.c
deps/lightning/lib/jit_x86.c
deps/lightning/lib/lightning.c
deps/lightrec/.gitrepo
deps/lightrec/CMakeLists.txt
deps/lightrec/emitter.c
deps/lightrec/lightrec-private.h
deps/lightrec/lightrec.c
deps/lightrec/optimizer.c

index d49a4e8..f4cb3ca 100644 (file)
@@ -4,9 +4,9 @@
 ; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme
 ;
 [subrepo]
-       remote = https://git.savannah.gnu.org/git/lightning.git
-       branch = master
-       commit = 876c1043bec5bfd594482b40700c84693e40d0eb
-       parent = cef02748fe77c0d29b441447659262ce1da47c4b
+       remote = https://github.com/pcercuei/gnu_lightning.git
+       branch = pcsx_rearmed
+       commit = ac905ceb09ce479623377733c4b463f1aa3eb99e
+       parent = b74a54b1ac0fa605f56411704fb902d7cf17c71a
        method = merge
        cmdver = 0.4.3
diff --git a/deps/lightning/check/movzr.ok b/deps/lightning/check/movzr.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/movzr.tst b/deps/lightning/check/movzr.tst
new file mode 100644 (file)
index 0000000..baa3ff8
--- /dev/null
@@ -0,0 +1,62 @@
+.data  8
+ok:
+.c     "ok\n"
+
+#define CMOVR(N, T, OP, I0, I1, V, R0, R1, R2)         \
+       movi %R1 I0                                     \
+       movi %R2 I1                                     \
+       movi %R0 V                                      \
+       OP##r##T %R0 %R1 %R2                            \
+       beqi OP##T##N##r_##R0##R1##R2 %R0 V             \
+       calli @abort                                    \
+OP##T##N##r_##R0##R1##R2:
+
+/* reg0 = reg1 op reg0 */
+#define CMOVR1(N, T, OP, I0, I1, V, R0, R1, R2)                \
+       movi %R0 I1                                     \
+       movi %R1 I0                                     \
+       movi %R2 V                                      \
+       OP##r##T %R0 %R1 %R0                            \
+       beqr OP##T##N##r_1##R0##R1##R2 %R0 %R2          \
+       calli @abort                                    \
+OP##T##N##r_1##R0##R1##R2:
+
+#define TEST_CMOV1(N, OP, I0, I1, V, R0, R1, R2)       \
+       CMOVR(N, , OP, I0, I1, V, R0, R1, R2)           \
+       CMOVR1(N, , OP, I0, I1, V, R0, R1, R2)          \
+
+#define TEST_CMOV(N, OP, I0, I1, V)                    \
+       TEST_CMOV1(N, OP, I0, I1, V, v0, v1, v2)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r0)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r1)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r2)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v1, v2, r1)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v1, v2, r2)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v2, r0, r1)        \
+       TEST_CMOV1(N, OP, I0, I1, V, v2, r0, r2)        \
+       TEST_CMOV1(N, OP, I0, I1, V, r0, r1, r2)
+
+#define MOVZR(N, I0, I1, V)    TEST_CMOV(N, movz, I0, I1, V)
+#define MOVNR(N, I0, I1, V)    TEST_CMOV(N, movn, I0, I1, V)
+
+.code
+       prolog
+
+       MOVZR(0, 0x0, 0x0, 0x0)
+       MOVZR(1, 0xf7de, 0x0, 0xf7de)
+
+       MOVZR(2, 0x0, 0xdead, 0xdead)
+       MOVZR(3, 0xf7de, 0xdead, 0xdead)
+
+       MOVNR(0, 0x0, 0x0, 0x0)
+       MOVNR(1, 0xf7de, 0x0, 0x0)
+
+       MOVNR(2, 0x0, 0xdead, 0x0)
+       MOVNR(3, 0xf7de, 0xdead, 0xf7de)
+
+       prepare
+               pushargi ok
+               ellipsis
+       finishi @printf
+       ret
+       epilog
index c14f635..51c08d3 100644 (file)
@@ -244,6 +244,8 @@ lshr                 O1 = O2 << O3
 lshi                 O1 = O2 << O3
 rshr     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
 rshi     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+movzr                O1 = O3 ? O1 : O2
+movnr                O1 = O3 ? O2 : O1
 @end example
 
 @item Four operand binary ALU operations
index e1d8a0a..422fc13 100644 (file)
@@ -891,6 +891,10 @@ typedef enum {
 #define jit_movr_d_w(u, v)     jit_new_node_ww(jit_code_movr_d_w, u, v)
 #define jit_movi_d_w(u, v)     jit_new_node_wd(jit_code_movi_d_w, u, v)
 
+#define jit_movnr(u,v,w)       jit_new_node_www(jit_code_movnr,u,v,w)
+#define jit_movzr(u,v,w)       jit_new_node_www(jit_code_movzr,u,v,w)
+    jit_code_movnr,            jit_code_movzr,
+
     jit_code_last_code
 } jit_code_t;
 
index b73f4b1..119547d 100644 (file)
@@ -391,6 +391,7 @@ static void _nop(jit_state_t*,jit_int32_t);
 #   define JR(r0)                      hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
 #  endif
 #  define J(i0)                                hi(MIPS_J,i0)
+#  define MOVN(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVN)
 #  define MOVZ(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVZ)
 #  define comr(r0,r1)                  xori(r0,r1,-1)
 #  define negr(r0,r1)                  subr(r0,_ZERO_REGNO,r1)
@@ -506,6 +507,8 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define movnr(r0,r1,r2)              MOVN(r0, r1, r2)
+#  define movzr(r0,r1,r2)              MOVZ(r0, r1, r2)
 #  define ldr_c(r0,r1)                 LB(r0,0,r1)
 #  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
index 613aa00..b33fef2 100644 (file)
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
+    4, /* movnr */
+    4, /* movzr */
 #endif /* __WORDSIZE */
index dafade8..5ffad2b 100644 (file)
@@ -1428,6 +1428,8 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _i);
                case_rr(ext, _ui);
 #endif
+               case_rrr(movn,);
+               case_rrr(movz,);
                case_rr(mov,);
            case jit_code_movi:
                if (node->flag & jit_flag_node) {
index 547f36c..6dcf672 100644 (file)
@@ -369,6 +369,10 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movsr_u(r0, r1)              _movsr_u(_jit, r0, r1)
 static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movnr(r0, r1, r2)              _movnr(_jit, r0, r1, r2)
+static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define movzr(r0, r1, r2)              _movzr(_jit, r0, r1, r2)
+static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  if __X64 && !__X64_32
 #    define movir(r0, r1)              _movir(_jit, r0, r1)
 static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -698,6 +702,7 @@ static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t);
 #      define ffsl(l)                  __builtin_ffsl(l)
 #    endif
 #  endif
+#  define jit_cmov_p()                 jit_cpu.cmov
 #endif
 
 #if CODE
@@ -2213,6 +2218,32 @@ _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     mrm(0x03, r7(r0), r7(r1));
 }
 
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(jit_cmov_p());
+
+    testr(r2, r2);
+
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0x45);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(jit_cmov_p());
+
+    testr(r2, r2);
+
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0x44);
+    mrm(0x03, r7(r0), r7(r1));
+}
+
 #if __X64
 static void
 _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
index 663b840..2cf8880 100644 (file)
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    7, /* movnr */
+    7, /* movzr */
 #endif
 
 #if __X64
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    7, /* movnr */
+    7, /* movzr */
 #else
 
 #  if __X64_32
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    7, /* movnr */
+    7, /* movzr */
 
 #  else
 #define JIT_INSTR_MAX 115
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    7, /* movnr */
+    7, /* movzr */
 #endif /* __CYGWIN__ || _WIN32 */
 #  endif /* __X64_32 */
 #endif /* __X64 */
index 7dd900e..133ee39 100644 (file)
@@ -1674,6 +1674,8 @@ _emit_code(jit_state_t *_jit)
                case_rrw(gt, _u);
                case_rrr(ne,);
                case_rrw(ne,);
+               case_rrr(movn,);
+               case_rrr(movz,);
                case_rr(mov,);
            case jit_code_movi:
                if (node->flag & jit_flag_node) {
index 22eca0c..3063293 100644 (file)
@@ -1435,6 +1435,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_unordi_d:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl;
            break;
+       case jit_code_movnr:    case jit_code_movzr:
        case jit_code_addr:     case jit_code_addxr:    case jit_code_addcr:
        case jit_code_subr:     case jit_code_subxr:    case jit_code_subcr:
        case jit_code_mulr:     case jit_code_divr:     case jit_code_divr_u:
index 38490c7..4b96823 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/lightrec.git
        branch = master
-       commit = 747da9c5d3e485f853b21bab3d158bd9b14d0500
-       parent = e8633a2e14027e4552940ef3e1c27c40b94c4870
+       commit = ce40f8388079945b60fd3f3dbef8ebaddf6f2685
+       parent = 1f22b268b62cf9a3fad39b9b642ded0890902f58
        method = merge
        cmdver = 0.4.3
index 6a139f4..9ff58d6 100644 (file)
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.0)
-project(lightrec LANGUAGES C VERSION 0.4)
+project(lightrec LANGUAGES C VERSION 0.5)
 
 set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries")
 if (NOT BUILD_SHARED_LIBS)
index 99f6756..578af87 100644 (file)
@@ -827,7 +827,7 @@ static void rec_alu_div(struct lightrec_cstate *state,
        u8 reg_hi = get_mult_div_hi(c);
        jit_state_t *_jit = block->_jit;
        jit_node_t *branch, *to_end;
-       u8 lo, hi, rs, rt, rflags = 0;
+       u8 lo = 0, hi = 0, rs, rt, rflags = 0;
 
        jit_note(__FILE__, __LINE__);
 
@@ -985,24 +985,20 @@ static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block
 {
        struct regcache *reg_cache = state->reg_cache;
        jit_state_t *_jit = block->_jit;
-       u8 tmp, tmp2, tmp3;
+       u8 tmp, tmp3;
 
        if (with_arg)
                tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
-       tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R0);
        tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
 
        jit_ldxi(tmp, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, c_wrapper));
-       jit_ldxi(tmp2, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, c_wrappers[wrapper]));
+                offsetof(struct lightrec_state, wrappers_eps[wrapper]));
        if (with_arg)
                jit_movi(tmp3, arg);
 
        jit_callr(tmp);
 
        lightrec_free_reg(reg_cache, tmp);
-       lightrec_free_reg(reg_cache, tmp2);
        if (with_arg)
                lightrec_free_reg(reg_cache, tmp3);
        lightrec_regcache_mark_live(reg_cache, _jit);
@@ -1416,21 +1412,7 @@ static void rec_special_BREAK(struct lightrec_cstate *state,
        rec_break_syscall(state, block, offset, true);
 }
 
-static void rec_mfc(struct lightrec_cstate *state,
-                   const struct block *block, u16 offset)
-{
-       struct regcache *reg_cache = state->reg_cache;
-       union code c = block->opcode_list[offset].c;
-       jit_state_t *_jit = block->_jit;
-
-       jit_note(__FILE__, __LINE__);
-       lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true);
-
-       call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MFC);
-}
-
-static void rec_mtc(struct lightrec_cstate *state,
-                   const struct block *block, u16 offset)
+static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
        struct regcache *reg_cache = state->reg_cache;
        union code c = block->opcode_list[offset].c;
@@ -1483,7 +1465,7 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
        struct regcache *reg_cache = state->reg_cache;
        const union code c = block->opcode_list[offset].c;
        jit_state_t *_jit = block->_jit;
-       u8 rt, tmp, tmp2, status;
+       u8 rt, tmp = 0, tmp2, status;
 
        jit_note(__FILE__, __LINE__);
 
@@ -1518,13 +1500,13 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
                tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
                jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
                           offsetof(struct lightrec_state, regs.cp0[13]));
+
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
        }
 
        if (c.r.rd == 12) {
                status = rt;
        } else if (c.r.rd == 13) {
-               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
-
                /* Cause = (Cause & ~0x0300) | (value & 0x0300) */
                jit_andi(tmp2, rt, 0x0300);
                jit_ori(tmp, tmp, 0x0300);
@@ -1544,14 +1526,25 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset)
                jit_andi(tmp, tmp, 0x0300);
                jit_nei(tmp, tmp, 0);
                jit_andr(tmp, tmp, status);
+       }
+
+       if (c.r.rd == 12) {
+               /* Exit dynarec in case we unmask a hardware interrupt.
+                * exit_flags = !(~status & 0x401) */
+
+               jit_comr(tmp2, status);
+               jit_andi(tmp2, tmp2, 0x401);
+               jit_eqi(tmp2, tmp2, 0);
+               jit_orr(tmp, tmp, tmp2);
+       }
+
+       if (c.r.rd == 12 || c.r.rd == 13) {
                jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
                           LIGHTREC_REG_STATE, tmp);
 
                lightrec_free_reg(reg_cache, tmp);
-       }
-
-       if (c.r.rd == 13)
                lightrec_free_reg(reg_cache, tmp2);
+       }
 
        lightrec_free_reg(reg_cache, rt);
 
@@ -1591,29 +1584,253 @@ static void rec_cp0_CTC0(struct lightrec_cstate *state,
 static void rec_cp2_basic_MFC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       const u32 zext_regs = 0x300f0080;
+       u8 rt, tmp, tmp2, tmp3, out, flags;
+       u8 reg = c.r.rd == 15 ? 14 : c.r.rd;
+       unsigned int i;
+
        _jit_name(block->_jit, __func__);
-       rec_mfc(state, block, offset);
+
+       flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT;
+       rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, flags);
+
+       switch (reg) {
+       case 1:
+       case 3:
+       case 5:
+       case 8:
+       case 9:
+       case 10:
+       case 11:
+               jit_ldxi_s(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               break;
+       case 7:
+       case 16:
+       case 17:
+       case 18:
+       case 19:
+               jit_ldxi_us(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               break;
+       case 28:
+       case 29:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+               tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               for (i = 0; i < 3; i++) {
+                       out = i == 0 ? rt : tmp;
+
+                       jit_ldxi_s(tmp, LIGHTREC_REG_STATE,
+                                  offsetof(struct lightrec_state, regs.cp2d[9 + i]));
+                       jit_movi(tmp2, 0x1f);
+                       jit_rshi(out, tmp, 7);
+
+                       jit_ltr(tmp3, tmp2, out);
+                       jit_movnr(out, tmp2, tmp3);
+
+                       jit_gei(tmp2, out, 0);
+                       jit_movzr(out, tmp2, tmp2);
+
+                       if (i > 0) {
+                               jit_lshi(tmp, tmp, 5 * i);
+                               jit_orr(rt, rt, tmp);
+                       }
+               }
+
+
+               lightrec_free_reg(reg_cache, tmp);
+               lightrec_free_reg(reg_cache, tmp2);
+               lightrec_free_reg(reg_cache, tmp3);
+               break;
+       default:
+               jit_ldxi_i(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[reg]));
+               break;
+       }
+
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_cp2_basic_CFC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 rt;
+
        _jit_name(block->_jit, __func__);
-       rec_mfc(state, block, offset);
+
+       switch (c.r.rd) {
+       case 4:
+       case 12:
+       case 20:
+       case 26:
+       case 27:
+       case 29:
+       case 30:
+               rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT);
+               jit_ldxi_s(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
+               break;
+       default:
+               rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT);
+               jit_ldxi_i(rt, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2c[c.r.rd]));
+               break;
+       }
+
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_cp2_basic_MTC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       jit_node_t *loop, *to_loop;
+       u8 rt, tmp, tmp2, flags = 0;
+
        _jit_name(block->_jit, __func__);
-       rec_mtc(state, block, offset);
+
+       if (c.r.rd == 31)
+               return;
+
+       if (c.r.rd == 30)
+               flags |= REG_EXT;
+
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags);
+
+       switch (c.r.rd) {
+       case 15:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               jit_ldxi_i(tmp, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[13]));
+
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+               jit_ldxi_i(tmp2, LIGHTREC_REG_STATE,
+                          offsetof(struct lightrec_state, regs.cp2d[14]));
+
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]),
+                          LIGHTREC_REG_STATE, tmp);
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]),
+                          LIGHTREC_REG_STATE, tmp2);
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]),
+                          LIGHTREC_REG_STATE, rt);
+
+               lightrec_free_reg(reg_cache, tmp);
+               lightrec_free_reg(reg_cache, tmp2);
+               break;
+       case 28:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               jit_lshi(tmp, rt, 7);
+               jit_andi(tmp, tmp, 0xf80);
+               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]),
+                          LIGHTREC_REG_STATE, tmp);
+
+               jit_lshi(tmp, rt, 2);
+               jit_andi(tmp, tmp, 0xf80);
+               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]),
+                          LIGHTREC_REG_STATE, tmp);
+
+               jit_rshi(tmp, rt, 3);
+               jit_andi(tmp, tmp, 0xf80);
+               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]),
+                          LIGHTREC_REG_STATE, tmp);
+
+               lightrec_free_reg(reg_cache, tmp);
+               break;
+       case 30:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               /* if (rt < 0) rt = ~rt; */
+               jit_rshi(tmp, rt, 31);
+               jit_xorr(tmp, rt, tmp);
+
+               /* We know the sign bit is 0. Left-shift by 1 to start the algorithm */
+               jit_lshi(tmp, tmp, 1);
+               jit_movi(tmp2, 33);
+
+               /* Decrement tmp2 and right-shift the value by 1 until it equals zero */
+               loop = jit_label();
+               jit_subi(tmp2, tmp2, 1);
+               jit_rshi_u(tmp, tmp, 1);
+               to_loop = jit_bnei(tmp, 0);
+
+               jit_patch_at(to_loop, loop);
+
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]),
+                          LIGHTREC_REG_STATE, tmp2);
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]),
+                          LIGHTREC_REG_STATE, rt);
+
+               lightrec_free_reg(reg_cache, tmp);
+               lightrec_free_reg(reg_cache, tmp2);
+               break;
+       default:
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]),
+                          LIGHTREC_REG_STATE, rt);
+               break;
+       }
+
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_cp2_basic_CTC2(struct lightrec_cstate *state,
                               const struct block *block, u16 offset)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       const union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 rt, tmp, tmp2;
+
        _jit_name(block->_jit, __func__);
-       rec_mtc(state, block, offset);
+
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0);
+
+       switch (c.r.rd) {
+       case 4:
+       case 12:
+       case 20:
+       case 26:
+       case 27:
+       case 29:
+       case 30:
+               jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
+                          LIGHTREC_REG_STATE, rt);
+               break;
+       case 31:
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+               jit_andi(tmp, rt, 0x7f87e000);
+               jit_nei(tmp, tmp, 0);
+               jit_lshi(tmp, tmp, 31);
+
+               jit_andi(tmp2, rt, 0x7ffff000);
+               jit_orr(tmp, tmp2, tmp);
+
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]),
+                          LIGHTREC_REG_STATE, tmp);
+
+               lightrec_free_reg(reg_cache, tmp);
+               lightrec_free_reg(reg_cache, tmp2);
+               break;
+
+       default:
+               jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]),
+                          LIGHTREC_REG_STATE, rt);
+       }
+
+       lightrec_free_reg(reg_cache, rt);
 }
 
 static void rec_cp0_RFE(struct lightrec_cstate *state,
index e9efcb5..4b797a1 100644 (file)
@@ -98,7 +98,6 @@ struct lightrec_branch_target {
 enum c_wrappers {
        C_WRAPPER_RW,
        C_WRAPPER_RW_GENERIC,
-       C_WRAPPER_MFC,
        C_WRAPPER_MTC,
        C_WRAPPER_CP,
        C_WRAPPER_SYSCALL,
@@ -128,7 +127,8 @@ struct lightrec_state {
        u32 exit_flags;
        u32 old_cycle_counter;
        struct block *dispatcher, *c_wrapper_block;
-       void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT];
+       void *c_wrappers[C_WRAPPERS_COUNT];
+       void *wrappers_eps[C_WRAPPERS_COUNT];
        struct tinymm *tinymm;
        struct blockcache *block_cache;
        struct recompiler *rec;
index 3a6e112..d4ab419 100644 (file)
@@ -406,17 +406,9 @@ u32 lightrec_mfc(struct lightrec_state *state, union code op)
                return state->regs.cp2c[op.r.rd];
 }
 
-static void lightrec_mfc_cb(struct lightrec_state *state, union code op)
-{
-       u32 rt = lightrec_mfc(state, op);
-
-       if (op.r.rt)
-               state->regs.gpr[op.r.rt] = rt;
-}
-
 static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
 {
-       u32 status, cause;
+       u32 status, oldstatus, cause;
 
        switch (reg) {
        case 1:
@@ -426,12 +418,13 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
        case 15:
                /* Those registers are read-only */
                return;
-       default: /* fall-through */
+       default:
                break;
        }
 
        if (reg == 12) {
                status = state->regs.cp0[12];
+               oldstatus = status;
 
                if (status & ~data & BIT(16)) {
                        state->ops.enable_ram(state, true);
@@ -441,14 +434,24 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
                }
        }
 
-       state->regs.cp0[reg] = data;
+       if (reg == 13) {
+               state->regs.cp0[13] &= ~0x300;
+               state->regs.cp0[13] |= data & 0x300;
+       } else {
+               state->regs.cp0[reg] = data;
+       }
 
        if (reg == 12 || reg == 13) {
                cause = state->regs.cp0[13];
                status = state->regs.cp0[12];
 
+               /* Handle software interrupts */
                if (!!(status & cause & 0x300) & status)
                        lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
+
+               /* Handle hardware interrupts */
+               if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401))
+                       lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
        }
 }
 
@@ -684,6 +687,7 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        int stack_ptr;
        jit_word_t code_size;
        jit_node_t *to_tramp, *to_fn_epilog;
+       jit_node_t *addr[C_WRAPPERS_COUNT - 1];
 
        block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
        if (!block)
@@ -698,9 +702,22 @@ static struct block * generate_wrapper(struct lightrec_state *state)
 
        /* Wrapper entry point */
        jit_prolog();
+       jit_tramp(256);
+
+       /* Add entry points; separate them by opcodes that increment
+        * LIGHTREC_REG_STATE (since we cannot touch other registers).
+        * The difference will then tell us which C function to call. */
+       for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) {
+               jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8);
+               addr[i - 1] = jit_indirect();
+       }
+
+       jit_epilog();
+       jit_prolog();
 
        stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
 
+       /* Save all temporaries on stack */
        for (i = 0; i < NUM_TEMPS; i++)
                jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
 
@@ -710,6 +727,7 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        /* The trampoline will jump back here */
        to_fn_epilog = jit_label();
 
+       /* Restore temporaries from stack */
        for (i = 0; i < NUM_TEMPS; i++)
                jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
 
@@ -724,6 +742,13 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        jit_tramp(256);
        jit_patch(to_tramp);
 
+       /* Retrieve the wrapper function */
+       jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, c_wrappers));
+
+       /* Restore LIGHTREC_REG_STATE to its correct value */
+       jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state);
+
        jit_prepare();
        jit_pushargr(LIGHTREC_REG_STATE);
        jit_pushargr(LIGHTREC_REG_CYCLE);
@@ -741,6 +766,11 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        block->flags = 0;
        block->nb_ops = 0;
 
+       state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function;
+
+       for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
+               state->wrappers_eps[i] = jit_address(addr[i]);
+
        jit_get_code(&code_size);
        lightrec_register(MEM_FOR_CODE, code_size);
 
@@ -943,7 +973,7 @@ err_no_mem:
 
 union code lightrec_read_opcode(struct lightrec_state *state, u32 pc)
 {
-       void *host;
+       void *host = NULL;
 
        lightrec_get_map(state, &host, kunseg(pc));
 
@@ -1261,13 +1291,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                         * finishes. */
                        if (ENABLE_THREADED_COMPILER)
                                lightrec_recompiler_remove(state->rec, block2);
+               }
 
-                       /* We know from now on that block2 isn't going to be
-                        * compiled. We can override the LUT entry with our
-                        * new block's entry point. */
-                       offset = lut_offset(block->pc) + target->offset;
-                       state->code_lut[offset] = jit_address(target->label);
+               /* We know from now on that block2 (if present) isn't going to
+                * be compiled. We can override the LUT entry with our new
+                * block's entry point. */
+               offset = lut_offset(block->pc) + target->offset;
+               state->code_lut[offset] = jit_address(target->label);
 
+               if (block2) {
                        pr_debug("Reap block 0x%08x as it's covered by block "
                                 "0x%08x\n", block2->pc, block->pc);
 
@@ -1487,11 +1519,8 @@ struct lightrec_state * lightrec_init(char *argv0,
        if (!state->c_wrapper_block)
                goto err_free_dispatcher;
 
-       state->c_wrapper = state->c_wrapper_block->function;
-
        state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb;
        state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
-       state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb;
        state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
        state->c_wrappers[C_WRAPPER_CP] = lightrec_cp;
        state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
index 98a26f6..f719192 100644 (file)
@@ -463,6 +463,10 @@ static u32 lightrec_propagate_consts(const struct opcode *op, u32 known, u32 *v)
 {
        union code c = op->c;
 
+       /* Register $zero is always, well, zero */
+       known |= BIT(0);
+       v[0] = 0;
+
        if (op->flags & LIGHTREC_SYNC)
                return 0;
 
@@ -833,10 +837,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                if (!op->opcode)
                        continue;
 
-               /* Register $zero is always, well, zero */
-               known |= BIT(0);
-               values[0] = 0;
-
                switch (op->i.op) {
                case OP_BEQ:
                        if (op->i.rs == op->i.rt) {
@@ -1238,10 +1238,6 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
        for (i = 0; i < block->nb_ops; i++) {
                list = &block->opcode_list[i];
 
-               /* Register $zero is always, well, zero */
-               known |= BIT(0);
-               values[0] = 0;
-
                switch (list->i.op) {
                case OP_SB:
                case OP_SH:
@@ -1476,11 +1472,22 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset,
        }
 }
 
+static bool lightrec_always_skip_div_check(void)
+{
+#ifdef __mips__
+       return true;
+#else
+       return false;
+#endif
+}
+
 static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block)
 {
        struct opcode *list;
        u8 reg_hi, reg_lo;
        unsigned int i;
+       u32 known = BIT(0);
+       u32 values[32] = { 0 };
 
        for (i = 0; i < block->nb_ops - 1; i++) {
                list = &block->opcode_list[i];
@@ -1489,19 +1496,27 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *
                        continue;
 
                switch (list->r.op) {
-               case OP_SPECIAL_MULT:
-               case OP_SPECIAL_MULTU:
                case OP_SPECIAL_DIV:
                case OP_SPECIAL_DIVU:
+                       /* If we are dividing by a non-zero constant, don't
+                        * emit the div-by-zero check. */
+                       if (lightrec_always_skip_div_check() ||
+                           (known & BIT(list->c.r.rt) && values[list->c.r.rt]))
+                               list->flags |= LIGHTREC_NO_DIV_CHECK;
+               case OP_SPECIAL_MULT: /* fall-through */
+               case OP_SPECIAL_MULTU:
                        break;
                default:
+                       known = lightrec_propagate_consts(list, known, values);
                        continue;
                }
 
                /* Don't support opcodes in delay slots */
                if ((i && has_delay_slot(block->opcode_list[i - 1].c)) ||
-                   (list->flags & LIGHTREC_NO_DS))
+                   (list->flags & LIGHTREC_NO_DS)) {
+                       known = lightrec_propagate_consts(list, known, values);
                        continue;
+               }
 
                reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false);
                if (reg_lo == 0) {
@@ -1543,6 +1558,8 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *
                } else {
                        list->r.imm = 0;
                }
+
+               known = lightrec_propagate_consts(list, known, values);
        }
 
        return 0;