X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_arm.c;h=25aa7cbb628f9a21515c7d84b86e073a33c63772;hb=d481fb64f2aac7a36532142cda11fa43f5ca792f;hp=64a70f96614d55f52f3db097759d5e3ac33c6420;hpb=02a5662c31c401081716623cc80bb1c4ab1dbb19;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_arm.c b/deps/lightning/lib/jit_arm.c index 64a70f96..25aa7cbb 100644 --- a/deps/lightning/lib/jit_arm.c +++ b/deps/lightning/lib/jit_arm.c @@ -240,6 +240,11 @@ jit_get_cpu(void) /* do not generate hardware integer division by default */ if (jit_cpu.version == 7) jit_cpu.extend = 0; + + /* By default generate extra instructions for unaligned load/store. */ + jit_cpu.unaligned = 1; + /* Linux should only not handle unaligned vfp load/store */ + jit_cpu.vfp_unaligned = 1; } void @@ -1337,6 +1342,16 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + if (jit_swf_p()) \ + swf_##name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w));\ + else \ + vfp_##name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w));\ + case jit_code_##name##i##type: \ + break #define case_rrrr(name, type) \ case jit_code_##name##r##type: \ name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ @@ -1558,6 +1573,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); @@ -1576,16 +1595,38 @@ _emit_code(jit_state_t *_jit) case_rrw(rem, _u); case_rrr(lsh,); case_rrw(lsh,); +#define qlshr(r0, r1, r2, r3) fallback_qlshr(r0, r1, r2, r3) +#define qlshi(r0, r1, r2, i0) fallback_qlshi(r0, r1, r2, i0) +#define qlshr_u(r0, r1, r2, r3) fallback_qlshr_u(r0, r1, r2, r3) +#define qlshi_u(r0, r1, r2, i0) fallback_qlshi_u(r0, r1, r2, i0) + case_rrrr(qlsh,); + case_rrrw(qlsh,); + case_rrrr(qlsh, _u); + case_rrrw(qlsh, _u); case_rrr(rsh,); case_rrw(rsh,); case_rrr(rsh, _u); case_rrw(rsh, _u); +#define qrshr(r0, r1, r2, r3) fallback_qrshr(r0, r1, r2, r3) +#define qrshi(r0, r1, r2, i0) fallback_qrshi(r0, r1, r2, i0) +#define qrshr_u(r0, r1, r2, r3) fallback_qrshr_u(r0, r1, r2, r3) +#define qrshi_u(r0, r1, r2, i0) fallback_qrshi_u(r0, r1, r2, i0) + case_rrrr(qrsh,); + case_rrrw(qrsh,); + case_rrrr(qrsh, _u); + case_rrrw(qrsh, _u); + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); case_rr(clo,); case_rr(clz,); case_rr(cto,); case_rr(ctz,); + case_rr(rbit,); + case_rr(popcnt,); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -1614,6 +1655,18 @@ _emit_code(jit_state_t *_jit) case_rrw(ldx, _us); case_rrr(ldx, _i); case_rrw(ldx, _i); + case jit_code_unldr: + unldr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi: + unldi(rn(node->u.w), node->v.w, node->w.w); + break; + case jit_code_unldr_u: + unldr_u(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_u: + unldi_u(rn(node->u.w), node->v.w, node->w.w); + break; case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1626,10 +1679,28 @@ _emit_code(jit_state_t *_jit) case_wrr(stx, _s); case_rrr(stx, _i); case_wrr(stx, _i); + case jit_code_unstr: + unstr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti: + unsti(node->u.w, rn(node->v.w), node->w.w); + break; case_rr(hton, _us); case_rr(hton, _ui); case_rr(bswap, _us); case_rr(bswap, _ui); + case jit_code_extr: + extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_extr_u: + extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_depr: + depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_depi: + depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); @@ -1734,15 +1805,43 @@ _emit_code(jit_state_t *_jit) case_vv(abs, _f); case_vv(neg, _f); case_vv(sqrt, _f); + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_vv(ext, _f); case_vv(ld, _f); case_vw(ld, _f); case_vvv(ldx, _f); case_vvw(ldx, _f); + case jit_code_unldr_x: + if (jit_swf_p()) + swf_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w); + else + vfp_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_x: + if (jit_swf_p()) + swf_unldi_x(rn(node->u.w), node->v.w, node->w.w); + else + vfp_unldi_x(rn(node->u.w), node->v.w, node->w.w); + break; case_vv(st, _f); case_wv(st, _f); case_vvv(stx, _f); case_wvv(stx, _f); + case jit_code_unstr_x: + if (jit_swf_p()) + swf_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w); + else + vfp_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti_x: + if (jit_swf_p()) + swf_unsti_x(node->u.w, rn(node->v.w), node->w.w); + else + vfp_unsti_x(node->u.w, rn(node->v.w), node->w.w); + break; case_vv(mov, _f); case jit_code_movi_f: assert_data(node); @@ -1820,6 +1919,10 @@ _emit_code(jit_state_t *_jit) case_vv(abs, _d); case_vv(neg, _d); case_vv(sqrt, _d); + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_vv(ext, _d); case_vv(ld, _d); case_vw(ld, _d); @@ -2028,41 +2131,46 @@ _emit_code(jit_state_t *_jit) break; case jit_code_movr_w_f: if (jit_swf_p()) - swf_movr_f(rn(node->u.w), rn(node->v.w)); + swf_movr_w_f(rn(node->u.w), rn(node->v.w)); else - vfp_movr_f(rn(node->u.w), rn(node->v.w)); + vfp_movr_w_f(rn(node->u.w), rn(node->v.w)); break; case jit_code_movr_f_w: if (jit_swf_p()) - swf_movr_f(rn(node->u.w), rn(node->v.w)); + swf_movr_f_w(rn(node->u.w), rn(node->v.w)); else - vfp_movr_f(rn(node->u.w), rn(node->v.w)); + vfp_movr_f_w(rn(node->u.w), rn(node->v.w)); break; case jit_code_movi_f_w: assert_data(node); + movi_f_w(rn(node->u.w), node->v.f); + break; + case jit_code_movi_w_f: if (jit_swf_p()) - swf_movi_f(rn(node->u.w), node->v.f); + swf_movi_w_f(rn(node->u.w), node->v.w); else - vfp_movi_f(rn(node->u.w), node->v.f); + vfp_movi_w_f(rn(node->u.w), node->v.w); break; case jit_code_movr_ww_d: if (jit_swf_p()) - swf_movr_d(rn(node->u.w), rn(node->v.w)); + swf_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w)); else - vfp_movr_d(rn(node->u.w), rn(node->v.w)); + vfp_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w)); break; case jit_code_movr_d_ww: if (jit_swf_p()) - swf_movr_d(rn(node->u.w), rn(node->w.w)); + swf_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w)); else - vfp_movr_d(rn(node->u.w), rn(node->w.w)); + vfp_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w)); break; case jit_code_movi_d_ww: - assert_data(node); + movi_d_ww(rn(node->u.w), rn(node->v.w), node->w.d); + break; + case jit_code_movi_ww_d: if (jit_swf_p()) - swf_movi_d(rn(node->u.w), node->w.d); + swf_movi_ww_d(rn(node->u.w), node->v.w, node->w.w); else - vfp_movi_d(rn(node->u.w), node->w.d); + vfp_movi_ww_d(rn(node->u.w), node->v.w, node->w.w); break; case jit_code_va_start: vastart(rn(node->u.w)); @@ -2115,6 +2223,63 @@ _emit_code(jit_state_t *_jit) case jit_code_retval_f: case jit_code_retval_d: case jit_code_prepare: case jit_code_finishr: case jit_code_finishi: + case jit_code_negi_f: case jit_code_absi_f: + case jit_code_sqrti_f: case jit_code_negi_d: + case jit_code_absi_d: case jit_code_sqrti_d: + break; + case jit_code_negi: + negi(rn(node->u.w), node->v.w); + break; + case jit_code_comi: + comi(rn(node->u.w), node->v.w); + break; + case jit_code_exti_c: + exti_c(rn(node->u.w), node->v.w); + break; + case jit_code_exti_uc: + exti_uc(rn(node->u.w), node->v.w); + break; + case jit_code_exti_s: + exti_s(rn(node->u.w), node->v.w); + break; + case jit_code_exti_us: + exti_us(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_us: + bswapi_us(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_ui: + bswapi_ui(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_us: + htoni_us(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_ui: + htoni_ui(rn(node->u.w), node->v.w); + break; + case jit_code_cloi: + cloi(rn(node->u.w), node->v.w); + break; + case jit_code_clzi: + clzi(rn(node->u.w), node->v.w); + break; + case jit_code_ctoi: + ctoi(rn(node->u.w), node->v.w); + break; + case jit_code_ctzi: + ctzi(rn(node->u.w), node->v.w); + break; + case jit_code_rbiti: + rbiti(rn(node->u.w), node->v.w); + break; + case jit_code_popcnti: + popcnti(rn(node->u.w), node->v.w); + break; + case jit_code_exti: + exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; + case jit_code_exti_u: + exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); break; default: abort(); @@ -2124,12 +2289,19 @@ _emit_code(jit_state_t *_jit) /* update register live state */ jit_reglive(node); +#if defined JIT_INSTR_MAX + word = 4096 - JIT_INSTR_MAX; +#else + word = 3968; +#endif + /* longest sequence should be 64 bytes, but preventively + * do not let it go past 256 remaining bytes before a flush */ + if (word > 3968) + word = 3968; if (_jitc->consts.length && - (_jit->pc.uc - _jitc->consts.data >= 3968 || + (_jit->pc.uc - _jitc->consts.data >= word || (jit_uword_t)_jit->pc.uc - - (jit_uword_t)_jitc->consts.patches[0] >= 3968)) { - /* longest sequence should be 64 bytes, but preventively - * do not let it go past 128 remaining bytes before a flush */ + (jit_uword_t)_jitc->consts.patches[0] >= word)) { if (node->next && node->next->code != jit_code_jmpi && node->next->code != jit_code_jmpr &&