From: Autechre Date: Fri, 8 Apr 2022 19:46:31 +0000 (+0100) Subject: Merge pull request #643 from pcercuei/update-lightrec-20220408 X-Git-Tag: r24l~481 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e557df126d39c05f5bda1f3845522762dd8da2ef;hp=46a38bdab1a4d9f578a368705a9e3e144fd81189;p=pcsx_rearmed.git Merge pull request #643 from pcercuei/update-lightrec-20220408 Update lightrec 20220408 --- diff --git a/deps/lightning/.gitrepo b/deps/lightning/.gitrepo index d49a4e87..f4cb3ca0 100644 --- a/deps/lightning/.gitrepo +++ b/deps/lightning/.gitrepo @@ -4,9 +4,9 @@ ; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme ; [subrepo] - remote = https://git.savannah.gnu.org/git/lightning.git - branch = master - commit = 876c1043bec5bfd594482b40700c84693e40d0eb - parent = cef02748fe77c0d29b441447659262ce1da47c4b + remote = https://github.com/pcercuei/gnu_lightning.git + branch = pcsx_rearmed + commit = ac905ceb09ce479623377733c4b463f1aa3eb99e + parent = b74a54b1ac0fa605f56411704fb902d7cf17c71a method = merge cmdver = 0.4.3 diff --git a/deps/lightning/check/movzr.ok b/deps/lightning/check/movzr.ok new file mode 100644 index 00000000..9766475a --- /dev/null +++ b/deps/lightning/check/movzr.ok @@ -0,0 +1 @@ +ok diff --git a/deps/lightning/check/movzr.tst b/deps/lightning/check/movzr.tst new file mode 100644 index 00000000..baa3ff85 --- /dev/null +++ b/deps/lightning/check/movzr.tst @@ -0,0 +1,62 @@ +.data 8 +ok: +.c "ok\n" + +#define CMOVR(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R1 I0 \ + movi %R2 I1 \ + movi %R0 V \ + OP##r##T %R0 %R1 %R2 \ + beqi OP##T##N##r_##R0##R1##R2 %R0 V \ + calli @abort \ +OP##T##N##r_##R0##R1##R2: + +/* reg0 = reg1 op reg0 */ +#define CMOVR1(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I1 \ + movi %R1 I0 \ + movi %R2 V \ + OP##r##T %R0 %R1 %R0 \ + beqr OP##T##N##r_1##R0##R1##R2 %R0 %R2 \ + calli @abort \ +OP##T##N##r_1##R0##R1##R2: + +#define TEST_CMOV1(N, OP, I0, I1, V, R0, R1, R2) \ + CMOVR(N, , OP, I0, I1, V, R0, R1, R2) \ + CMOVR1(N, , OP, I0, I1, V, R0, R1, R2) \ + +#define TEST_CMOV(N, OP, I0, I1, V) \ + TEST_CMOV1(N, OP, I0, I1, V, v0, v1, v2) \ + TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r0) \ + TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r1) \ + TEST_CMOV1(N, OP, I0, I1, V, v0, v1, r2) \ + TEST_CMOV1(N, OP, I0, I1, V, v1, v2, r1) \ + TEST_CMOV1(N, OP, I0, I1, V, v1, v2, r2) \ + TEST_CMOV1(N, OP, I0, I1, V, v2, r0, r1) \ + TEST_CMOV1(N, OP, I0, I1, V, v2, r0, r2) \ + TEST_CMOV1(N, OP, I0, I1, V, r0, r1, r2) + +#define MOVZR(N, I0, I1, V) TEST_CMOV(N, movz, I0, I1, V) +#define MOVNR(N, I0, I1, V) TEST_CMOV(N, movn, I0, I1, V) + +.code + prolog + + MOVZR(0, 0x0, 0x0, 0x0) + MOVZR(1, 0xf7de, 0x0, 0xf7de) + + MOVZR(2, 0x0, 0xdead, 0xdead) + MOVZR(3, 0xf7de, 0xdead, 0xdead) + + MOVNR(0, 0x0, 0x0, 0x0) + MOVNR(1, 0xf7de, 0x0, 0x0) + + MOVNR(2, 0x0, 0xdead, 0x0) + MOVNR(3, 0xf7de, 0xdead, 0xf7de) + + prepare + pushargi ok + ellipsis + finishi @printf + ret + epilog diff --git a/deps/lightning/doc/body.texi b/deps/lightning/doc/body.texi index c14f6358..51c08d33 100644 --- a/deps/lightning/doc/body.texi +++ b/deps/lightning/doc/body.texi @@ -244,6 +244,8 @@ lshr O1 = O2 << O3 lshi O1 = O2 << O3 rshr _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} rshi _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} +movzr O1 = O3 ? O1 : O2 +movnr O1 = O3 ? O2 : O1 @end example @item Four operand binary ALU operations diff --git a/deps/lightning/include/lightning.h.in b/deps/lightning/include/lightning.h.in index e1d8a0a1..422fc138 100644 --- a/deps/lightning/include/lightning.h.in +++ b/deps/lightning/include/lightning.h.in @@ -891,6 +891,10 @@ typedef enum { #define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) #define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) +#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w) +#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) + jit_code_movnr, jit_code_movzr, + jit_code_last_code } jit_code_t; diff --git a/deps/lightning/lib/jit_mips-cpu.c b/deps/lightning/lib/jit_mips-cpu.c index b73f4b18..119547d0 100644 --- a/deps/lightning/lib/jit_mips-cpu.c +++ b/deps/lightning/lib/jit_mips-cpu.c @@ -391,6 +391,7 @@ static void _nop(jit_state_t*,jit_int32_t); # define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR) # endif # define J(i0) hi(MIPS_J,i0) +# define MOVN(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVN) # define MOVZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVZ) # define comr(r0,r1) xori(r0,r1,-1) # define negr(r0,r1) subr(r0,_ZERO_REGNO,r1) @@ -506,6 +507,8 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define movnr(r0,r1,r2) MOVN(r0, r1, r2) +# define movzr(r0,r1,r2) MOVZ(r0, r1, r2) # define ldr_c(r0,r1) LB(r0,0,r1) # define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); diff --git a/deps/lightning/lib/jit_mips-sz.c b/deps/lightning/lib/jit_mips-sz.c index 613aa009..b33fef2f 100644 --- a/deps/lightning/lib/jit_mips-sz.c +++ b/deps/lightning/lib/jit_mips-sz.c @@ -1207,4 +1207,6 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 12, /* movi_d_w */ + 4, /* movnr */ + 4, /* movzr */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_mips.c b/deps/lightning/lib/jit_mips.c index dafade85..5ffad2b5 100644 --- a/deps/lightning/lib/jit_mips.c +++ b/deps/lightning/lib/jit_mips.c @@ -1428,6 +1428,8 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/deps/lightning/lib/jit_x86-cpu.c b/deps/lightning/lib/jit_x86-cpu.c index 547f36c3..6dcf6727 100644 --- a/deps/lightning/lib/jit_x86-cpu.c +++ b/deps/lightning/lib/jit_x86-cpu.c @@ -369,6 +369,10 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t); static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t); # define movsr_u(r0, r1) _movsr_u(_jit, r0, r1) static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t); +#define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2) +static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +#define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2) +static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # if __X64 && !__X64_32 # define movir(r0, r1) _movir(_jit, r0, r1) static void _movir(jit_state_t*,jit_int32_t,jit_int32_t); @@ -698,6 +702,7 @@ static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t); # define ffsl(l) __builtin_ffsl(l) # endif # endif +# define jit_cmov_p() jit_cpu.cmov #endif #if CODE @@ -2213,6 +2218,32 @@ _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) mrm(0x03, r7(r0), r7(r1)); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(jit_cmov_p()); + + testr(r2, r2); + + rex(0, WIDE, r0, _NOREG, r1); + ic(0x0f); + ic(0x45); + mrm(0x03, r7(r0), r7(r1)); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + assert(jit_cmov_p()); + + testr(r2, r2); + + rex(0, WIDE, r0, _NOREG, r1); + ic(0x0f); + ic(0x44); + mrm(0x03, r7(r0), r7(r1)); +} + #if __X64 static void _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) diff --git a/deps/lightning/lib/jit_x86-sz.c b/deps/lightning/lib/jit_x86-sz.c index 663b840f..2cf88808 100644 --- a/deps/lightning/lib/jit_x86-sz.c +++ b/deps/lightning/lib/jit_x86-sz.c @@ -399,6 +399,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* movnr */ + 7, /* movzr */ #endif #if __X64 @@ -802,6 +804,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* movnr */ + 7, /* movzr */ #else # if __X64_32 @@ -1204,6 +1208,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* movnr */ + 7, /* movzr */ # else #define JIT_INSTR_MAX 115 @@ -1605,6 +1611,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 7, /* movnr */ + 7, /* movzr */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/deps/lightning/lib/jit_x86.c b/deps/lightning/lib/jit_x86.c index 7dd900e9..133ee39d 100644 --- a/deps/lightning/lib/jit_x86.c +++ b/deps/lightning/lib/jit_x86.c @@ -1674,6 +1674,8 @@ _emit_code(jit_state_t *_jit) case_rrw(gt, _u); case_rrr(ne,); case_rrw(ne,); + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c index 22eca0cb..30632939 100644 --- a/deps/lightning/lib/lightning.c +++ b/deps/lightning/lib/lightning.c @@ -1435,6 +1435,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_unordi_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl; break; + case jit_code_movnr: case jit_code_movzr: case jit_code_addr: case jit_code_addxr: case jit_code_addcr: case jit_code_subr: case jit_code_subxr: case jit_code_subcr: case jit_code_mulr: case jit_code_divr: case jit_code_divr_u: diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 38490c7f..4b96823a 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = 747da9c5d3e485f853b21bab3d158bd9b14d0500 - parent = e8633a2e14027e4552940ef3e1c27c40b94c4870 + commit = ce40f8388079945b60fd3f3dbef8ebaddf6f2685 + parent = 1f22b268b62cf9a3fad39b9b642ded0890902f58 method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index 6a139f4d..9ff58d62 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -project(lightrec LANGUAGES C VERSION 0.4) +project(lightrec LANGUAGES C VERSION 0.5) set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries") if (NOT BUILD_SHARED_LIBS) diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 99f6756d..578af874 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -827,7 +827,7 @@ static void rec_alu_div(struct lightrec_cstate *state, u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; jit_node_t *branch, *to_end; - u8 lo, hi, rs, rt, rflags = 0; + u8 lo = 0, hi = 0, rs, rt, rflags = 0; jit_note(__FILE__, __LINE__); @@ -985,24 +985,20 @@ static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - u8 tmp, tmp2, tmp3; + u8 tmp, tmp3; if (with_arg) tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1); - tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R0); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); jit_ldxi(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrapper)); - jit_ldxi(tmp2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrappers[wrapper])); + offsetof(struct lightrec_state, wrappers_eps[wrapper])); if (with_arg) jit_movi(tmp3, arg); jit_callr(tmp); lightrec_free_reg(reg_cache, tmp); - lightrec_free_reg(reg_cache, tmp2); if (with_arg) lightrec_free_reg(reg_cache, tmp3); lightrec_regcache_mark_live(reg_cache, _jit); @@ -1416,21 +1412,7 @@ static void rec_special_BREAK(struct lightrec_cstate *state, rec_break_syscall(state, block, offset, true); } -static void rec_mfc(struct lightrec_cstate *state, - const struct block *block, u16 offset) -{ - struct regcache *reg_cache = state->reg_cache; - union code c = block->opcode_list[offset].c; - jit_state_t *_jit = block->_jit; - - jit_note(__FILE__, __LINE__); - lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true); - - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MFC); -} - -static void rec_mtc(struct lightrec_cstate *state, - const struct block *block, u16 offset) +static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -1483,7 +1465,7 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) struct regcache *reg_cache = state->reg_cache; const union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; - u8 rt, tmp, tmp2, status; + u8 rt, tmp = 0, tmp2, status; jit_note(__FILE__, __LINE__); @@ -1518,13 +1500,13 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) tmp = lightrec_alloc_reg_temp(reg_cache, _jit); jit_ldxi_i(tmp, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, regs.cp0[13])); + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); } if (c.r.rd == 12) { status = rt; } else if (c.r.rd == 13) { - tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); - /* Cause = (Cause & ~0x0300) | (value & 0x0300) */ jit_andi(tmp2, rt, 0x0300); jit_ori(tmp, tmp, 0x0300); @@ -1544,14 +1526,25 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) jit_andi(tmp, tmp, 0x0300); jit_nei(tmp, tmp, 0); jit_andr(tmp, tmp, status); + } + + if (c.r.rd == 12) { + /* Exit dynarec in case we unmask a hardware interrupt. + * exit_flags = !(~status & 0x401) */ + + jit_comr(tmp2, status); + jit_andi(tmp2, tmp2, 0x401); + jit_eqi(tmp2, tmp2, 0); + jit_orr(tmp, tmp, tmp2); + } + + if (c.r.rd == 12 || c.r.rd == 13) { jit_stxi_i(offsetof(struct lightrec_state, exit_flags), LIGHTREC_REG_STATE, tmp); lightrec_free_reg(reg_cache, tmp); - } - - if (c.r.rd == 13) lightrec_free_reg(reg_cache, tmp2); + } lightrec_free_reg(reg_cache, rt); @@ -1591,29 +1584,253 @@ static void rec_cp0_CTC0(struct lightrec_cstate *state, static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + const u32 zext_regs = 0x300f0080; + u8 rt, tmp, tmp2, tmp3, out, flags; + u8 reg = c.r.rd == 15 ? 14 : c.r.rd; + unsigned int i; + _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + + flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT; + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, flags); + + switch (reg) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + jit_ldxi_s(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[reg])); + break; + case 7: + case 16: + case 17: + case 18: + case 19: + jit_ldxi_us(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[reg])); + break; + case 28: + case 29: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); + + for (i = 0; i < 3; i++) { + out = i == 0 ? rt : tmp; + + jit_ldxi_s(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[9 + i])); + jit_movi(tmp2, 0x1f); + jit_rshi(out, tmp, 7); + + jit_ltr(tmp3, tmp2, out); + jit_movnr(out, tmp2, tmp3); + + jit_gei(tmp2, out, 0); + jit_movzr(out, tmp2, tmp2); + + if (i > 0) { + jit_lshi(tmp, tmp, 5 * i); + jit_orr(rt, rt, tmp); + } + } + + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + lightrec_free_reg(reg_cache, tmp3); + break; + default: + jit_ldxi_i(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[reg])); + break; + } + + lightrec_free_reg(reg_cache, rt); } static void rec_cp2_basic_CFC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt; + _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + + switch (c.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT); + jit_ldxi_s(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2c[c.r.rd])); + break; + default: + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT); + jit_ldxi_i(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2c[c.r.rd])); + break; + } + + lightrec_free_reg(reg_cache, rt); } static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + jit_node_t *loop, *to_loop; + u8 rt, tmp, tmp2, flags = 0; + _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + + if (c.r.rd == 31) + return; + + if (c.r.rd == 30) + flags |= REG_EXT; + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags); + + switch (c.r.rd) { + case 15: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[13])); + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp2d[14])); + + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[12]), + LIGHTREC_REG_STATE, tmp); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[13]), + LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[14]), + LIGHTREC_REG_STATE, rt); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + case 28: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_lshi(tmp, rt, 7); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[9]), + LIGHTREC_REG_STATE, tmp); + + jit_lshi(tmp, rt, 2); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[10]), + LIGHTREC_REG_STATE, tmp); + + jit_rshi(tmp, rt, 3); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(offsetof(struct lightrec_state, regs.cp2d[11]), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); + break; + case 30: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + /* if (rt < 0) rt = ~rt; */ + jit_rshi(tmp, rt, 31); + jit_xorr(tmp, rt, tmp); + + /* We know the sign bit is 0. Left-shift by 1 to start the algorithm */ + jit_lshi(tmp, tmp, 1); + jit_movi(tmp2, 33); + + /* Decrement tmp2 and right-shift the value by 1 until it equals zero */ + loop = jit_label(); + jit_subi(tmp2, tmp2, 1); + jit_rshi_u(tmp, tmp, 1); + to_loop = jit_bnei(tmp, 0); + + jit_patch_at(to_loop, loop); + + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[31]), + LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[30]), + LIGHTREC_REG_STATE, rt); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + default: + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2d[c.r.rd]), + LIGHTREC_REG_STATE, rt); + break; + } + + lightrec_free_reg(reg_cache, rt); } static void rec_cp2_basic_CTC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt, tmp, tmp2; + _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0); + + switch (c.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + jit_stxi_s(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]), + LIGHTREC_REG_STATE, rt); + break; + case 31: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_andi(tmp, rt, 0x7f87e000); + jit_nei(tmp, tmp, 0); + jit_lshi(tmp, tmp, 31); + + jit_andi(tmp2, rt, 0x7ffff000); + jit_orr(tmp, tmp2, tmp); + + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[31]), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + + default: + jit_stxi_i(offsetof(struct lightrec_state, regs.cp2c[c.r.rd]), + LIGHTREC_REG_STATE, rt); + } + + lightrec_free_reg(reg_cache, rt); } static void rec_cp0_RFE(struct lightrec_cstate *state, diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index e9efcb5e..4b797a1d 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -98,7 +98,6 @@ struct lightrec_branch_target { enum c_wrappers { C_WRAPPER_RW, C_WRAPPER_RW_GENERIC, - C_WRAPPER_MFC, C_WRAPPER_MTC, C_WRAPPER_CP, C_WRAPPER_SYSCALL, @@ -128,7 +127,8 @@ struct lightrec_state { u32 exit_flags; u32 old_cycle_counter; struct block *dispatcher, *c_wrapper_block; - void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT]; + void *c_wrappers[C_WRAPPERS_COUNT]; + void *wrappers_eps[C_WRAPPERS_COUNT]; struct tinymm *tinymm; struct blockcache *block_cache; struct recompiler *rec; diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 3a6e1129..d4ab419f 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -406,17 +406,9 @@ u32 lightrec_mfc(struct lightrec_state *state, union code op) return state->regs.cp2c[op.r.rd]; } -static void lightrec_mfc_cb(struct lightrec_state *state, union code op) -{ - u32 rt = lightrec_mfc(state, op); - - if (op.r.rt) - state->regs.gpr[op.r.rt] = rt; -} - static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) { - u32 status, cause; + u32 status, oldstatus, cause; switch (reg) { case 1: @@ -426,12 +418,13 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) case 15: /* Those registers are read-only */ return; - default: /* fall-through */ + default: break; } if (reg == 12) { status = state->regs.cp0[12]; + oldstatus = status; if (status & ~data & BIT(16)) { state->ops.enable_ram(state, true); @@ -441,14 +434,24 @@ static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) } } - state->regs.cp0[reg] = data; + if (reg == 13) { + state->regs.cp0[13] &= ~0x300; + state->regs.cp0[13] |= data & 0x300; + } else { + state->regs.cp0[reg] = data; + } if (reg == 12 || reg == 13) { cause = state->regs.cp0[13]; status = state->regs.cp0[12]; + /* Handle software interrupts */ if (!!(status & cause & 0x300) & status) lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); + + /* Handle hardware interrupts */ + if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401)) + lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); } } @@ -684,6 +687,7 @@ static struct block * generate_wrapper(struct lightrec_state *state) int stack_ptr; jit_word_t code_size; jit_node_t *to_tramp, *to_fn_epilog; + jit_node_t *addr[C_WRAPPERS_COUNT - 1]; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -698,9 +702,22 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* Wrapper entry point */ jit_prolog(); + jit_tramp(256); + + /* Add entry points; separate them by opcodes that increment + * LIGHTREC_REG_STATE (since we cannot touch other registers). + * The difference will then tell us which C function to call. */ + for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { + jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8); + addr[i - 1] = jit_indirect(); + } + + jit_epilog(); + jit_prolog(); stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS); + /* Save all temporaries on stack */ for (i = 0; i < NUM_TEMPS; i++) jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); @@ -710,6 +727,7 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* The trampoline will jump back here */ to_fn_epilog = jit_label(); + /* Restore temporaries from stack */ for (i = 0; i < NUM_TEMPS; i++) jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t)); @@ -724,6 +742,13 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_tramp(256); jit_patch(to_tramp); + /* Retrieve the wrapper function */ + jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers)); + + /* Restore LIGHTREC_REG_STATE to its correct value */ + jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state); + jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(LIGHTREC_REG_CYCLE); @@ -741,6 +766,11 @@ static struct block * generate_wrapper(struct lightrec_state *state) block->flags = 0; block->nb_ops = 0; + state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function; + + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + state->wrappers_eps[i] = jit_address(addr[i]); + jit_get_code(&code_size); lightrec_register(MEM_FOR_CODE, code_size); @@ -943,7 +973,7 @@ err_no_mem: union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) { - void *host; + void *host = NULL; lightrec_get_map(state, &host, kunseg(pc)); @@ -1261,13 +1291,15 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, * finishes. */ if (ENABLE_THREADED_COMPILER) lightrec_recompiler_remove(state->rec, block2); + } - /* We know from now on that block2 isn't going to be - * compiled. We can override the LUT entry with our - * new block's entry point. */ - offset = lut_offset(block->pc) + target->offset; - state->code_lut[offset] = jit_address(target->label); + /* We know from now on that block2 (if present) isn't going to + * be compiled. We can override the LUT entry with our new + * block's entry point. */ + offset = lut_offset(block->pc) + target->offset; + state->code_lut[offset] = jit_address(target->label); + if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " "0x%08x\n", block2->pc, block->pc); @@ -1487,11 +1519,8 @@ struct lightrec_state * lightrec_init(char *argv0, if (!state->c_wrapper_block) goto err_free_dispatcher; - state->c_wrapper = state->c_wrapper_block->function; - state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb; state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb; - state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb; state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; state->c_wrappers[C_WRAPPER_CP] = lightrec_cp; state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb; diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 98a26f60..f719192b 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -463,6 +463,10 @@ static u32 lightrec_propagate_consts(const struct opcode *op, u32 known, u32 *v) { union code c = op->c; + /* Register $zero is always, well, zero */ + known |= BIT(0); + v[0] = 0; + if (op->flags & LIGHTREC_SYNC) return 0; @@ -833,10 +837,6 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl if (!op->opcode) continue; - /* Register $zero is always, well, zero */ - known |= BIT(0); - values[0] = 0; - switch (op->i.op) { case OP_BEQ: if (op->i.rs == op->i.rt) { @@ -1238,10 +1238,6 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) for (i = 0; i < block->nb_ops; i++) { list = &block->opcode_list[i]; - /* Register $zero is always, well, zero */ - known |= BIT(0); - values[0] = 0; - switch (list->i.op) { case OP_SB: case OP_SH: @@ -1476,11 +1472,22 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset, } } +static bool lightrec_always_skip_div_check(void) +{ +#ifdef __mips__ + return true; +#else + return false; +#endif +} + static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block) { struct opcode *list; u8 reg_hi, reg_lo; unsigned int i; + u32 known = BIT(0); + u32 values[32] = { 0 }; for (i = 0; i < block->nb_ops - 1; i++) { list = &block->opcode_list[i]; @@ -1489,19 +1496,27 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * continue; switch (list->r.op) { - case OP_SPECIAL_MULT: - case OP_SPECIAL_MULTU: case OP_SPECIAL_DIV: case OP_SPECIAL_DIVU: + /* If we are dividing by a non-zero constant, don't + * emit the div-by-zero check. */ + if (lightrec_always_skip_div_check() || + (known & BIT(list->c.r.rt) && values[list->c.r.rt])) + list->flags |= LIGHTREC_NO_DIV_CHECK; + case OP_SPECIAL_MULT: /* fall-through */ + case OP_SPECIAL_MULTU: break; default: + known = lightrec_propagate_consts(list, known, values); continue; } /* Don't support opcodes in delay slots */ if ((i && has_delay_slot(block->opcode_list[i - 1].c)) || - (list->flags & LIGHTREC_NO_DS)) + (list->flags & LIGHTREC_NO_DS)) { + known = lightrec_propagate_consts(list, known, values); continue; + } reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false); if (reg_lo == 0) { @@ -1543,6 +1558,8 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * } else { list->r.imm = 0; } + + known = lightrec_propagate_consts(list, known, values); } return 0;