X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_fallback.c;h=3a471eb2b0e1c64581f4695cdb665e8796589576;hb=d481fb64f2aac7a36532142cda11fa43f5ca792f;hp=2f7f214ef5f02703945888609991d2cc6dfd90a7;hpb=384af87540d751ef274e5956d58f4bbc153a34a9;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_fallback.c b/deps/lightning/lib/jit_fallback.c index 2f7f214e..3a471eb2 100644 --- a/deps/lightning/lib/jit_fallback.c +++ b/deps/lightning/lib/jit_fallback.c @@ -1,4 +1,7 @@ #if PROTO +#define USE_BIT_TABLES 1 +#define USE_BITSWAP_UNROLLED 0 +#define USE_BITSWAP_LOOP 0 #define fallback_save(r0) _fallback_save(_jit, r0) static void _fallback_save(jit_state_t*, jit_int32_t); #define fallback_load(r0) _fallback_load(_jit, r0) @@ -12,6 +15,55 @@ static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t); #define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im) static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t, jit_word_t); +#if !defined(__arm__) +# ifndef movr_w_f +# define movr_w_f(r0, r1) fallback_movr_w_f(r0, r1) +# define fallback_movr_w_f(r0,r1) _fallback_movr_w_f(_jit,r0,r1) +static void _fallback_movr_w_f(jit_state_t*, jit_int32_t, jit_int32_t); +# endif +# ifndef movr_f_w +# define movr_f_w(r0, r1) fallback_movr_f_w(r0, r1) +# define fallback_movr_f_w(r0,r1) _fallback_movr_f_w(_jit,r0,r1) +static void _fallback_movr_f_w(jit_state_t*, jit_int32_t, jit_int32_t); +# endif +# if __WORDSIZE == 32 +# ifndef movr_ww_d +# define movr_ww_d(r0, r1, r2) fallback_movr_ww_d(r0, r1, r2) +# define fallback_movr_ww_d(r0,r1,r2) _fallback_movr_ww_d(_jit,r0,r1,r2) +static void _fallback_movr_ww_d(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t); +# endif +# ifndef movr_d_ww +# define fallback_movr_d_ww(r0,r1,r2) _fallback_movr_d_ww(_jit,r0,r1,r2) +static void _fallback_movr_d_ww(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t); +# define movr_d_ww(r0, r1, r2) fallback_movr_d_ww(r0, r1, r2) +# endif +# else +# ifndef movr_w_d +# define movr_w_d(r0, r1) fallback_movr_w_d(r0, r1); +# define fallback_movr_w_d(r0,r1) _fallback_movr_w_d(_jit,r0,r1) +static void _fallback_movr_w_d(jit_state_t*, jit_int32_t, jit_int32_t); +# endif +# ifndef movr_d_w +# define movr_d_w(r0, r1) fallback_movr_d_w(r0, r1); +# define fallback_movr_d_w(r0,r1) _fallback_movr_d_w(_jit,r0,r1) +static void _fallback_movr_d_w(jit_state_t*, jit_int32_t, jit_int32_t); +# endif +# endif +#endif +#ifndef unldr +# define unldr(r0, r1, i0) fallback_unldr(r0, r1, i0) +#endif +#ifndef unldi +# define unldi(r0, i0, i1) fallback_unldi(r0, i0, i1) +#endif +#ifndef unstr +# define unstr(r0, r1, i0) fallback_unstr(r0, r1, i0) +#endif +#ifndef unsti +# define unsti(i0, r0, i1) fallback_unsti(i0, r0, i1) +#endif #define fallback_clo(r0,r1) _fallback_clo(_jit,r0,r1) static void _fallback_clo(jit_state_t*, jit_int32_t, jit_int32_t); #define fallback_clz(r0,r1) _fallback_clz(_jit,r0,r1) @@ -20,46 +72,235 @@ static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t); static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t); #define fallback_ctz(r0,r1) _fallback_ctz(_jit,r0,r1) static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t); -# if defined(__ia64__) -# define fallback_patch_jmpi(inst,lbl) \ - do { \ - sync(); \ - patch_at(jit_code_jmpi, inst, lbl); \ - } while (0) +#define fallback_rbit(r0,r1) _fallback_rbit(_jit, r0, r1) +static void _fallback_rbit(jit_state_t*, jit_int32_t, jit_int32_t); +#define fallback_popcnt(r0,r1) _fallback_popcnt(_jit, r0, r1) +static void _fallback_popcnt(jit_state_t*, jit_int32_t, jit_int32_t); +#define fallback_lrotr(r0, r1, r2) _fallback_lrotr(_jit, r0, r1, r2) +static void _fallback_lrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t); +#define fallback_lroti(r0, r1, i0) _fallback_lroti(_jit, r0, r1, i0) +static void _fallback_lroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_rrotr(r0, r1, r2) _fallback_rrotr(_jit, r0, r1, r2) +static void _fallback_rrotr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t); +#define fallback_rroti(r0, r1, i0) _fallback_rroti(_jit, r0, r1, i0) +static void _fallback_rroti(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_ext(r0,r1,i0,i1) _fallback_ext(_jit,r0,r1,i0,i1) +static void _fallback_ext(jit_state_t*, + jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +#define fallback_ext_u(r0,r1,i0,i1) _fallback_ext_u(_jit,r0,r1,i0,i1) +static void _fallback_ext_u(jit_state_t*, + jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +#define fallback_dep(r0,r1,i0,i1) _fallback_dep(_jit,r0,r1,i0,i1) +static void _fallback_dep(jit_state_t*, + jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +#define fallback_qlshr(r0,r1,r2,r3) _fallback_qlshr(_jit,r0,r1,r2,r3) +static void _fallback_qlshr(jit_state_t *_jit, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +#define fallback_qlshi(r0,r1,r2,i0) _fallback_qlshi(_jit,r0,r1,r2,i0) +static void _fallback_qlshi(jit_state_t *_jit, + jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_qlshr_u(r0,r1,r2,r3) _fallback_qlshr_u(_jit,r0,r1,r2,r3) +static void _fallback_qlshr_u(jit_state_t *_jit, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +#define fallback_qlshi_u(r0,r1,r2,i0) _fallback_qlshi_u(_jit,r0,r1,r2,i0) +static void _fallback_qlshi_u(jit_state_t *_jit, + jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_qrshr(r0,r1,r2,r3) _fallback_qrshr(_jit,r0,r1,r2,r3) +static void _fallback_qrshr(jit_state_t *_jit, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +#define fallback_qrshi(r0,r1,r2,i0) _fallback_qrshi(_jit,r0,r1,r2,i0) +static void _fallback_qrshi(jit_state_t *_jit, + jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_qrshr_u(r0,r1,r2,r3) _fallback_qrshr_u(_jit,r0,r1,r2,r3) +static void _fallback_qrshr_u(jit_state_t *_jit, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +#define fallback_qrshi_u(r0,r1,r2,i0) _fallback_qrshi_u(_jit,r0,r1,r2,i0) +static void _fallback_qrshi_u(jit_state_t *_jit, + jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +#define unldr2(r0, r1, sign) _unldr2(_jit, r0, r1, sign) +static void _unldr2(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +#define unldi2(r0, i1, sign) _unldi2(_jit, r0, i1, sign) +static void _unldi2(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t); +#define unldr3(r0, r1, sign) _unldr3(_jit, r0, r1, sign) +static void _unldr3(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +#define unldi3(r0, i1, sign) _unldi3(_jit, r0, i1, sign) +static void _unldi3(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t); +#if __WORDSIZE == 32 +# define unldr4(r0, r1) _unldr4(_jit, r0, r1) +static void _unldr4(jit_state_t*,jit_int32_t,jit_int32_t); +# define unldi4(r0, i1) _unldi4(_jit, r0, i1) +static void _unldi4(jit_state_t*,jit_int32_t,jit_word_t); +#else +# define unldr4(r0, r1, sign) _unldr4(_jit, r0, r1, sign) +static void _unldr4(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# define unldi4(r0, i1, sign) _unldi4(_jit, r0, i1, sign) +static void _unldi4(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t); +# define unldr5(r0, r1, sign) _unldr5(_jit, r0, r1, sign) +static void _unldr5(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# define unldi5(r0, i1, sign) _unldi5(_jit, r0, i1, sign) +static void _unldi5(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t); +# define unldr6(r0, r1, sign) _unldr6(_jit, r0, r1, sign) +static void _unldr6(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# define unldi6(r0, i1, sign) _unldi6(_jit, r0, i1, sign) +static void _unldi6(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t); +# define unldr7(r0, r1, sign) _unldr7(_jit, r0, r1, sign) +static void _unldr7(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# define unldi7(r0, i1, sign) _unldi7(_jit, r0, i1, sign) +static void _unldi7(jit_state_t*,jit_int32_t,jit_word_t,jit_bool_t); +# define unldr8(r0, r1) _unldr8(_jit, r0, r1) +static void _unldr8(jit_state_t*,jit_int32_t,jit_int32_t); +# define unldi8(r0, i1) _unldi8(_jit, r0, i1) +static void _unldi8(jit_state_t*,jit_int32_t,jit_word_t); +#endif +#define unstr2(r0, r1) _unstr2(_jit, r0, r1) +static void _unstr2(jit_state_t*,jit_int32_t,jit_int32_t); +#define unsti2(r0, i0) _unsti2(_jit, r0, i0) +static void _unsti2(jit_state_t*,jit_int32_t,jit_word_t); +#define unstr3(r0, r1) _unstr3(_jit, r0, r1) +static void _unstr3(jit_state_t*,jit_int32_t,jit_int32_t); +#define unsti3(r0, i0) _unsti3(_jit, r0, i0) +static void _unsti3(jit_state_t*,jit_int32_t,jit_word_t); +#define unstr4(r0, r1) _unstr4(_jit, r0, r1) +static void _unstr4(jit_state_t*,jit_int32_t,jit_int32_t); +#define unsti4(r0, i0) _unsti4(_jit, r0, i0) +static void _unsti4(jit_state_t*,jit_int32_t,jit_word_t); +#if __WORDSIZE == 64 +# define unstr5(r0, r1) _unstr5(_jit, r0, r1) +static void _unstr5(jit_state_t*,jit_int32_t,jit_int32_t); +# define unsti5(r0, i0) _unsti5(_jit, r0, i0) +static void _unsti5(jit_state_t*,jit_int32_t,jit_word_t); +# define unstr6(r0, r1) _unstr6(_jit, r0, r1) +static void _unstr6(jit_state_t*,jit_int32_t,jit_int32_t); +# define unsti6(r0, i0) _unsti6(_jit, r0, i0) +static void _unsti6(jit_state_t*,jit_int32_t,jit_word_t); +# define unstr7(r0, r1) _unstr7(_jit, r0, r1) +static void _unstr7(jit_state_t*,jit_int32_t,jit_int32_t); +# define unsti7(r0, i0) _unsti7(_jit, r0, i0) +static void _unsti7(jit_state_t*,jit_int32_t,jit_word_t); +# define unstr8(r0, r1) _unstr8(_jit, r0, r1) +static void _unstr8(jit_state_t*,jit_int32_t,jit_int32_t); +# define unsti8(r0, i0) _unsti8(_jit, r0, i0) +static void _unsti8(jit_state_t*,jit_int32_t,jit_word_t); +#endif +#define unldrw(r0, r1, i0) _unldrw(_jit, r0, r1, i0) +static void _unldrw(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t); +#define unldiw(r0, i0, i1) _unldiw(_jit, r0, i0, i1) +static void _unldiw(jit_state_t*,jit_int32_t,jit_word_t, jit_word_t); +#define unldx(r0, i0) _unldx(_jit, r0, i0) +static void _unldx(jit_state_t*,jit_int32_t,jit_word_t); +#define unldx_u(r0, i0) _unldx_u(_jit, r0, i0) +static void _unldx_u(jit_state_t*,jit_int32_t,jit_word_t); +#define fallback_unldr(r0, r1, i0) _fallback_unldr(_jit, r0, r1, i0) +static void _fallback_unldr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_unldi(r0, i0, i1) _fallback_unldi(_jit, r0, i0, i1) +static void _fallback_unldi(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t); +#define fallback_unldr_u(r0, r1, i0) _fallback_unldr_u(_jit, r0, r1, i0) +static void _fallback_unldr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_unldi_u(r0, i0, i1) _fallback_unldi_u(_jit, r0, i0, i1) +static void _fallback_unldi_u(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t); +#define fallback_unstr(r0, r1, i0) _fallback_unstr(_jit, r0, r1, i0) +static void _fallback_unstr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define fallback_unsti(i0, r0, i1) _fallback_unsti(_jit, i0, r0, i1) +static void _fallback_unsti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +#if !defined(__arm__) +# define fallback_unldr_x(r0, r1, i0) _fallback_unldr_x(_jit, r0, r1, i0) +static void _fallback_unldr_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define fallback_unldi_x(r0, i0, i1) _fallback_unldi_x(_jit, r0, i0, i1) +static void _fallback_unldi_x(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t); +# define fallback_unstr_x(r0, r1, i0) _fallback_unstr_x(_jit, r0, r1, i0) +static void _fallback_unstr_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define fallback_unsti_x(i0, r0, i1) _fallback_unsti_x(_jit, i0, r0, i1) +static void _fallback_unsti_x(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +#endif +# if defined(__s390__) || defined(__s390x__) +# define fallback_jit_get_reg(flags) jit_get_reg_but_zero(flags) # else -# define fallback_patch_jmpi(inst,lbl) fallback_patch_at(inst,lbl) +# define fallback_jit_get_reg(flags) jit_get_reg(flags) # endif -# if defined(__arm__) -# define fallback_patch_at(inst,lbl) patch_at(arm_patch_jump,inst,lbl) -# elif defined(__ia64__) -# define fallback_patch_at(inst,lbl) \ - do { \ - sync(); \ - patch_at(jit_code_bnei, inst, lbl); \ - } while (0); +# if defined(__ia64__) +# define fallback_flush() sync() +# elif defined(__mips__) +# define fallback_flush() flush() # else -# define fallback_patch_at(inst,lbl) patch_at(inst,lbl) +# define fallback_flush() /**/ # endif # if defined(__mips__) -# define fallback_jmpi(i0) jmpi(i0,1) +# define fallback_jmpi(i0) jmpi(i0, 1) # elif defined(__arm__) -# define fallback_jmpi(i0) jmpi_p(i0,1) +# define fallback_jmpi(i0) jmpi_p(i0, 1) # elif defined(__s390__) || defined(__s390x__) -# define fallback_jmpi(i0) jmpi(i0,1) +# define fallback_jmpi(i0) jmpi(i0, 1) # else # define fallback_jmpi(i0) jmpi(i0) # endif -# if defined(__mips__) -# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1) -# elif defined(__s390__) || defined(__s390x__) -# define fallback_bnei(i0,r0,i1) bnei_p(i0,r0,i1) -# else -# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1) -# endif # if defined(__s390__) || defined(__s390x__) +# define fallback_beqr(i0,r0,i1) beqr_p(i0,r0,i1) +# define fallback_beqi(i0,r0,i1) beqi_p(i0,r0,i1) +# define fallback_bner(i0,r0,r1) bner_p(i0,r0,r1) +# define fallback_bnei(i0,r0,i1) bnei_p(i0,r0,i1) +# define fallback_blei(i0,r0,i1) blei_p(i0,r0,i1) # define fallback_bmsr(i0,r0,r1) bmsr_p(i0,r0,r1) +# define fallback_bmsi(i0,r0,i1) bmsi_p(i0,r0,i1) # else +# define fallback_beqr(i0,r0,r1) beqr(i0,r0,r1) +# define fallback_beqi(i0,r0,i1) beqi(i0,r0,i1) +# define fallback_bner(i0,r0,r1) bner(i0,r0,r1) +# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1) +# define fallback_blei(i0,r0,i1) blei(i0,r0,i1) # define fallback_bmsr(i0,r0,r1) bmsr(i0,r0,r1) +# define fallback_bmsi(i0,r0,i1) bmsi(i0,r0,i1) +# endif +# if defined(__ia64__) +# define fallback_patch_jmpi(inst, lbl) \ + patch_at(jit_code_jmpi, inst, lbl) +# define fallback_patch_beqr(inst, lbl) \ + patch_at(jit_code_beqr, inst, lbl) +# define fallback_patch_beqi(inst, lbl) \ + patch_at(jit_code_beqi, inst, lbl) +# define fallback_patch_bner(inst, lbl) \ + patch_at(jit_code_bner, inst, lbl) +# define fallback_patch_bnei(inst, lbl) \ + patch_at(jit_code_bnei, inst, lbl) +# define fallback_patch_blei(inst, lbl) \ + patch_at(jit_code_blei, inst, lbl) +# define fallback_patch_bmsr(inst, lbl) \ + patch_at(jit_code_bmsr, inst, lbl) +# define fallback_patch_bmsi(inst, lbl) \ + patch_at(jit_code_bmsi, inst, lbl) +# elif defined(__arm__) +# define fallback_patch_jmpi(inst, lbl) \ + patch_at(arm_patch_jump,inst, lbl) +# define fallback_patch_beqr(inst, lbl) \ + patch_at(arm_patch_jump,inst, lbl) +# define fallback_patch_beqi(inst, lbl) \ + patch_at(arm_patch_jump,inst, lbl) +# define fallback_patch_bner(inst, lbl) \ + patch_at(arm_patch_jump,inst, lbl) +# define fallback_patch_bnei(inst, lbl) \ + patch_at(arm_patch_jump,inst, lbl) +# define fallback_patch_blei(inst, lbl) \ + patch_at(arm_patch_jump,inst, lbl) +# define fallback_patch_bmsr(inst, lbl) \ + patch_at(arm_patch_jump,inst, lbl) +# define fallback_patch_bmsi(inst, lbl) \ + patch_at(arm_patch_jump,inst, lbl) + # else +# define fallback_patch_jmpi(inst, lbl) \ + patch_at(inst, lbl) +# define fallback_patch_beqr(inst, lbl) \ + patch_at(inst, lbl) +# define fallback_patch_beqi(inst, lbl) \ + patch_at(inst, lbl) +# define fallback_patch_bner(inst, lbl) \ + patch_at(inst, lbl) +# define fallback_patch_bnei(inst, lbl) \ + patch_at(inst, lbl) +# define fallback_patch_blei(inst, lbl) \ + patch_at(inst, lbl) +# define fallback_patch_bmsr(inst, lbl) \ + patch_at(inst, lbl) +# define fallback_patch_bmsi(inst, lbl) \ + patch_at(inst, lbl) # endif #endif @@ -161,6 +402,7 @@ _fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1) # endif } +#ifdef NEED_FALLBACK_CASX static void _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) @@ -170,7 +412,7 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, /* XXX only attempts to fallback cas for lightning jit code */ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; if ((iscasi = r1 == _NOREG)) { - r1_reg = jit_get_reg(jit_class_gpr|jit_class_sav); + r1_reg = fallback_jit_get_reg(jit_class_gpr|jit_class_sav); r1 = rn(r1_reg); movi(r1, i0); } @@ -189,14 +431,149 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, str_l(r1, r3); # endif /* done: */ + fallback_flush(); done = _jit->pc.w; fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex); fallback_load(r0); - fallback_patch_at(jump, done); + fallback_flush(); + fallback_patch_bnei(jump, done); fallback_load_regs(r0); if (iscasi) jit_unget_reg(r1_reg); } +#endif + +#ifdef fallback_movr_w_f +static void +_fallback_movr_w_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (!_jitc->function->cvt_offset) { + _jitc->function->cvt_offset = jit_allocai(8); + _jitc->again = 1; + } + stxi_i(_jitc->function->cvt_offset, rn(JIT_FP), r1); + /* Useful for special debug builds */ +# if defined(__i386__) || defined(__x86_64__) + if (jit_cpu.sse2) + sse_ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset); + else + x87_ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset); +# else + ldxi_f(r0, rn(JIT_FP), _jitc->function->cvt_offset); +# endif +} +#endif + +#ifdef fallback_movr_f_w +static void +_fallback_movr_f_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (!_jitc->function->cvt_offset) { + _jitc->function->cvt_offset = jit_allocai(8); + _jitc->again = 1; + } +# if defined(__i386__) || defined(__x86_64__) + if (jit_cpu.sse2) + sse_stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1); + else + x87_stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1); +# else + stxi_f(_jitc->function->cvt_offset, rn(JIT_FP), r1); +# endif + ldxi_i(r0, rn(JIT_FP), _jitc->function->cvt_offset); +} +#endif + +#if __WORDSIZE == 32 +# ifdef fallback_movr_ww_d +static void +_fallback_movr_ww_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!_jitc->function->cvt_offset) { + _jitc->function->cvt_offset = jit_allocai(8); + _jitc->again = 1; + } + stxi_i(_jitc->function->cvt_offset, rn(JIT_FP), + __BYTE_ORDER == __LITTLE_ENDIAN ? r1 : r2); + stxi_i(_jitc->function->cvt_offset + 4, rn(JIT_FP), + __BYTE_ORDER == __LITTLE_ENDIAN ? r2 : r1); +# if defined(__i386__) || defined(__x86_64__) + if (jit_cpu.sse2) + sse_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset); + else + x87_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset); +# else + ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset); +# endif +} +# endif + +# ifdef fallback_movr_d_ww +static void +_fallback_movr_d_ww(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (!_jitc->function->cvt_offset) { + _jitc->function->cvt_offset = jit_allocai(8); + _jitc->again = 1; + } +# if defined(__i386__) || defined(__x86_64__) + if (jit_cpu.sse2) + sse_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2); + else + x87_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2); +# else + stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r2); +# endif + ldxi_i(__BYTE_ORDER == __LITTLE_ENDIAN ? r0 : r1, + rn(JIT_FP), _jitc->function->cvt_offset); + ldxi_i(__BYTE_ORDER == __LITTLE_ENDIAN ? r1 : r0, + rn(JIT_FP), _jitc->function->cvt_offset + 4); +} +# endif + +#else +# ifdef fallback_movr_w_d +static void +_fallback_movr_w_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (!_jitc->function->cvt_offset) { + _jitc->function->cvt_offset = jit_allocai(8); + _jitc->again = 1; + } + stxi_l(_jitc->function->cvt_offset, rn(JIT_FP), r1); +# if defined(__i386__) || defined(__x86_64__) + if (jit_cpu.sse2) + sse_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset); + else + x87_ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset); +# else + ldxi_d(r0, rn(JIT_FP), _jitc->function->cvt_offset); +# endif +} +# endif + +# ifdef fallback_movr_d_w +static void +_fallback_movr_d_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (!_jitc->function->cvt_offset) { + _jitc->function->cvt_offset = jit_allocai(8); + _jitc->again = 1; + } +# if defined(__i386__) || defined(__x86_64__) + if (jit_cpu.sse2) + sse_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1); + else + x87_stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1); +# else + stxi_d(_jitc->function->cvt_offset, rn(JIT_FP), r1); +# endif + ldxi_l(r0, rn(JIT_FP), _jitc->function->cvt_offset); +} +# endif +#endif static void _fallback_clo(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) @@ -205,63 +582,119 @@ _fallback_clo(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) comr(r0, r1); clz = fallback_bnei(_jit->pc.w, r0, 0); movi(r0, __WORDSIZE); + fallback_flush(); done = fallback_jmpi(_jit->pc.w); - fallback_patch_at(clz, _jit->pc.w); + fallback_flush(); + fallback_patch_bnei(clz, _jit->pc.w); fallback_clz(r0, r0); + fallback_flush(); fallback_patch_jmpi(done, _jit->pc.w); } static void _fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +# if USE_BIT_TABLES + /* t0 = __WORDSIZE - 8; + * loop: + * t1 = r1 >> t0; + * if (t1) + * goto done; + * t0 -= 8; + * if (t0) + * goto loop; + * t1 = r1; + * done: + * r0 = __WORDSIZE - 8 - t0 + clz_tab[t1] + */ + /* Table below is count of leading zeros of 8 bit values. */ + static const jit_uint8_t clz_tab[256] = { + 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + }; + jit_int32_t t0, t1; + jit_word_t loop, done; + t0 = fallback_jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); + movi(rn(t0), __WORDSIZE - 8); + fallback_flush(); + loop = _jit->pc.w; + rshr_u(rn(t1), r1, rn(t0)); + done = fallback_bnei(_jit->pc.w, rn(t1), 0); + subi(rn(t0), rn(t0), 8); + fallback_bnei(loop, rn(t0), 0); + movr(rn(t1), r1); + fallback_flush(); + fallback_patch_bnei(done, _jit->pc.w); + rsbi(r0, rn(t0), __WORDSIZE - 8); + movi(rn(t0), (jit_word_t)clz_tab); + ldxr_uc(rn(t1), rn(t0), rn(t1)); + addr(r0, r0, rn(t1)); + jit_unget_reg(t1); + jit_unget_reg(t0); +# else jit_int32_t r1_reg, r2, r2_reg; jit_word_t clz, l32, l16, l8, l4, l2, l1; l32 = fallback_bnei(_jit->pc.w, r1, 0); movi(r0, __WORDSIZE); clz = fallback_jmpi(_jit->pc.w); - fallback_patch_at(l32, _jit->pc.w); - r2_reg = jit_get_reg(jit_class_gpr); + fallback_flush(); + fallback_patch_bnei(l32, _jit->pc.w); + r2_reg = fallback_jit_get_reg(jit_class_gpr); r2 = rn(r2_reg); - r1_reg = jit_get_reg(jit_class_gpr); + r1_reg = fallback_jit_get_reg(jit_class_gpr); movr(rn(r1_reg), r1); r1 = rn(r1_reg); movi(r0, 0); -# if __WORDSIZE == 64 +# if __WORDSIZE == 64 movi(r2, 0xffffffff00000000UL); l32 = fallback_bmsr(_jit->pc.w, r1, r2); lshi(r1, r1, 32); addi(r0, r0, 32); - fallback_patch_at(l32, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l32, _jit->pc.w); lshi(r2, r2, 16); -# else +# else movi(r2, 0xffff0000UL); -# endif +# endif l16 = fallback_bmsr(_jit->pc.w, r1, r2); lshi(r1, r1, 16); addi(r0, r0, 16); - fallback_patch_at(l16, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l16, _jit->pc.w); lshi(r2, r2, 8); l8 = fallback_bmsr(_jit->pc.w, r1, r2); lshi(r1, r1, 8); addi(r0, r0, 8); - fallback_patch_at(l8, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l8, _jit->pc.w); lshi(r2, r2, 4); l4 = fallback_bmsr(_jit->pc.w, r1, r2); lshi(r1, r1, 4); addi(r0, r0, 4); - fallback_patch_at(l4, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l4, _jit->pc.w); lshi(r2, r2, 2); l2 = fallback_bmsr(_jit->pc.w, r1, r2); lshi(r1, r1, 2); addi(r0, r0, 2); - fallback_patch_at(l2, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l2, _jit->pc.w); lshi(r2, r2, 1); l1 = fallback_bmsr(_jit->pc.w, r1, r2); addi(r0, r0, 1); - fallback_patch_at(l1, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l1, _jit->pc.w); fallback_patch_jmpi(clz, _jit->pc.w); jit_unget_reg(r2_reg); jit_unget_reg(r1_reg); +# endif } static void @@ -272,61 +705,3540 @@ _fallback_cto(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) ctz = fallback_bnei(_jit->pc.w, r0, 0); movi(r0, __WORDSIZE); done = fallback_jmpi(_jit->pc.w); - fallback_patch_at(ctz, _jit->pc.w); + fallback_flush(); + fallback_patch_bnei(ctz, _jit->pc.w); fallback_ctz(r0, r0); + fallback_flush(); fallback_patch_jmpi(done, _jit->pc.w); } static void _fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +# if USE_BIT_TABLES + /* Adapted from http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightModLookup + * Table for 64 bits was recomputed choosing 67 as next prime number. + * The cost of the modulo might not compensate and could be better to + * use the alternate version (or rbit and use clz). + */ + jit_int32_t t0; +# if __WORDSIZE == 32 + static const jit_uint8_t mod37[] = { + 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, + 4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, + 5, 20, 8, 19, 18 + }; + /* return mod37[(-r1 & r1) % 37]; */ +# else + static const jit_uint8_t mod67[] = { + 64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54, + 4, 0, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55, 47, + 5, 32, 0, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27, 29, 50, + 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56, 7, 48, 35, + 6, 34, 33 + }; + /* return mod67[(-r1 & r1) % 67]; */ +# endif + t0 = fallback_jit_get_reg(jit_class_gpr); + if (r0 == r1) { + negr(rn(t0), r1); + andr(r0, rn(t0), r1); + } + else { + negr(r0, r1); + andr(r0, r0, r1); + } +# if __WORDSIZE == 32 + remi_u(r0, r0, 37); + movi(rn(t0), (jit_word_t)mod37); +# else + remi_u(r0, r0, 67); + movi(rn(t0), (jit_word_t)mod67); +# endif + ldxr_uc(r0, rn(t0), r0); + jit_unget_reg(t0); +# else jit_int32_t r1_reg, r2, r2_reg; jit_word_t ctz, l32, l16, l8, l4, l2, l1; l32 = fallback_bnei(_jit->pc.w, r1, 0); movi(r0, __WORDSIZE); ctz = fallback_jmpi(_jit->pc.w); - fallback_patch_at(l32, _jit->pc.w); - r2_reg = jit_get_reg(jit_class_gpr); + fallback_flush(); + fallback_patch_bnei(l32, _jit->pc.w); + r2_reg = fallback_jit_get_reg(jit_class_gpr); r2 = rn(r2_reg); - r1_reg = jit_get_reg(jit_class_gpr); + r1_reg = fallback_jit_get_reg(jit_class_gpr); movr(rn(r1_reg), r1); r1 = rn(r1_reg); movi(r0, 0); -# if __WORDSIZE == 64 +# if __WORDSIZE == 64 movi(r2, 0xffffffffUL); l32 = fallback_bmsr(_jit->pc.w, r1, r2); rshi_u(r1, r1, 32); addi(r0, r0, 32); - fallback_patch_at(l32, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l32, _jit->pc.w); rshi(r2, r2, 16); -# else +# else movi(r2, 0xffffUL); -# endif +# endif l16 = fallback_bmsr(_jit->pc.w, r1, r2); rshi_u(r1, r1, 16); addi(r0, r0, 16); - fallback_patch_at(l16, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l16, _jit->pc.w); rshi(r2, r2, 8); l8 = fallback_bmsr(_jit->pc.w, r1, r2); rshi_u(r1, r1, 8); addi(r0, r0, 8); - fallback_patch_at(l8, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l8, _jit->pc.w); rshi(r2, r2, 4); l4 = fallback_bmsr(_jit->pc.w, r1, r2); rshi_u(r1, r1, 4); addi(r0, r0, 4); - fallback_patch_at(l4, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l4, _jit->pc.w); rshi(r2, r2, 2); l2 = fallback_bmsr(_jit->pc.w, r1, r2); rshi_u(r1, r1, 2); addi(r0, r0, 2); - fallback_patch_at(l2, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l2, _jit->pc.w); rshi(r2, r2, 1); l1 = fallback_bmsr(_jit->pc.w, r1, r2); addi(r0, r0, 1); - fallback_patch_at(l1, _jit->pc.w); + fallback_flush(); + fallback_patch_bmsr(l1, _jit->pc.w); fallback_patch_jmpi(ctz, _jit->pc.w); jit_unget_reg(r2_reg); jit_unget_reg(r1_reg); +# endif +} + +static void +_fallback_rbit(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +# if USE_BIT_TABLES + /* t0 = r1; + * t1 = t0 & 0xff; + * t2 = swap_tab; + * r0 = t2[t1]; + * t3 = 8; + * loop: + * t1 = t0 >> t3; + * t1 &= 0xff; + * r0 <<= 8; + * r0 |= t2[t1]; + * t3 += 8; + * if (t3 < __WORDSIZE) + * goto loop; + */ + jit_word_t loop; + jit_int32_t t0, r1_reg, t1, t2, t3; + static const unsigned char swap_tab[256] = { + 0, 128, 64, 192, 32, 160, 96, 224, + 16, 144, 80, 208, 48, 176, 112, 240, + 8, 136, 72, 200, 40, 168, 104, 232, + 24, 152, 88, 216 ,56, 184, 120, 248, + 4, 132, 68, 196, 36, 164, 100, 228, + 20, 148, 84, 212, 52, 180, 116, 244, + 12, 140, 76, 204, 44, 172, 108, 236, + 28, 156, 92, 220, 60, 188, 124, 252, + 2, 130, 66, 194, 34, 162, 98, 226, + 18, 146, 82, 210, 50, 178, 114, 242, + 10, 138, 74, 202, 42, 170, 106, 234, + 26, 154, 90, 218, 58, 186, 122, 250, + 6, 134, 70, 198, 38, 166, 102, 230, + 22, 150, 86, 214, 54, 182, 118, 246, + 14, 142, 78, 206, 46, 174, 110, 238, + 30, 158, 94, 222, 62, 190, 126, 254, + 1, 129, 65, 193, 33, 161, 97, 225, + 17, 145, 81, 209, 49, 177, 113, 241, + 9, 137, 73, 201, 41, 169, 105, 233, + 25, 153, 89, 217, 57, 185, 121, 249, + 5, 133, 69, 197, 37, 165, 101, 229, + 21, 149, 85, 213, 53, 181, 117, 245, + 13, 141, 77, 205, 45, 173, 109, 237, + 29, 157, 93, 221, 61, 189, 125, 253, + 3, 131, 67, 195, 35, 163, 99, 227, + 19, 147, 83, 211, 51, 179, 115, 243, + 11, 139, 75, 203, 43, 171, 107, 235, + 27, 155, 91, 219, 59, 187, 123, 251, + 7, 135, 71, 199, 39, 167, 103, 231, + 23, 151, 87, 215, 55, 183, 119, 247, + 15, 143, 79, 207, 47, 175, 111, 239, + 31, 159, 95, 223, 63, 191, 127, 255 + }; + if (r0 == r1) { + t0 = fallback_jit_get_reg(jit_class_gpr); + r1_reg = rn(t0); + } + else { + t0 = JIT_NOREG; + r1_reg = r1; + } + t1 = fallback_jit_get_reg(jit_class_gpr); + t2 = fallback_jit_get_reg(jit_class_gpr); + t3 = fallback_jit_get_reg(jit_class_gpr); + if (r0 == r1) + movr(rn(t0), r1); + extr_uc(rn(t1), r1_reg); + movi(rn(t2), (jit_word_t)swap_tab); + ldxr_uc(r0, rn(t2), rn(t1)); + movi(rn(t3), 8); + fallback_flush(); + loop = _jit->pc.w; + rshr(rn(t1), r1_reg, rn(t3)); + extr_uc(rn(t1), rn(t1)); + lshi(r0, r0, 8); + ldxr_uc(rn(t1), rn(t2), rn(t1)); + orr(r0, r0, rn(t1)); + addi(rn(t3), rn(t3), 8); + blti(loop, rn(t3), __WORDSIZE); + jit_unget_reg(t3); + jit_unget_reg(t2); + jit_unget_reg(t1); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); +# elif USE_BITSWAP_UNROLLED +/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ +/* +unsigned int v; // 32-bit word to reverse bit order + +// swap odd and even bits +v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); +// swap consecutive pairs +v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); +// swap nibbles ... +v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); +// swap bytes +v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); +// swap 2-byte long pairs +v = ( v >> 16 ) | ( v << 16); + */ + jit_int32_t t0, t1, t2, t3, t4; + movr(r0, r1); + t0 = fallback_jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); + t2 = fallback_jit_get_reg(jit_class_gpr); + movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); + rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); + rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); + rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); + rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# if __WORDSIZE == 32 + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + lshi(rn(t2), r0, 16); /* t2 = v << 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# else + movi(rn(t0), 0x0000ffff0000ffffL); + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ + lshi(rn(t2), r0, 32); /* t2 = v << 32 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# endif + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); +# elif USE_BITSWAP_LOOP +/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ +/* +unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2 +unsigned int mask = ~0; +while ((s >>= 1) > 0) +{ + mask ^= (mask << s); + v = ((v >> s) & mask) | ((v << s) & ~mask); +} +*/ + jit_int32_t s, mask; + jit_word_t loop, done, t0, t1; + movr(v, r1); + s = fallback_jit_get_reg(jit_class_gpr); + movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */ + mask = fallback_jit_get_reg(jit_class_gpr); + movi(rn(mask), ~0L); /* mask = ~0; */ + flush(); + loop = _jit->pc.w; /* while ((s >>= 1) > 0) */ + rshi(rn(s), rn(s), 1); /* (s >>= 1) */ + done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */ + t0 = fallback_jit_get_reg(jit_class_gpr); + lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */ + xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */ + rshr(rn(t0), v, rn(s)); /* t0 = v >> s */ + andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */ + t1 = fallback_jit_get_reg(jit_class_gpr); + lshr(rn(t1), v, rn(s)); /* t1 = v << s */ + comr(v, rn(mask)); /* v = ~mask */ + andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */ + orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */ + jmpi(loop); + flush(); + patch_at(done, _jit->pc.w); + jit_unget_reg(t1); + jit_unget_reg(t0); + jit_unget_reg(mask); + jit_unget_reg(s); +# endif +} + +static void +_fallback_popcnt(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + /* Same approach as rbitr */ + /* t0 = r1; + * t1 = t0 & 0xff; + * t2 = pop_tab; + * r0 = t2[t1]; + * t3 = 8; + * loop: + * t1 = t0 >> t3; + * t1 &= 0xff; + * r0 <<= 8; + * r0 |= t2[t1]; + * t3 += 8; + * if (t3 < __WORDSIZE) + * goto loop; + */ + jit_word_t loop; + jit_int32_t t0, r1_reg, t1, t2, t3; + static const unsigned char pop_tab[256] = { + 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8 + }; + if (r0 == r1) { + t0 = fallback_jit_get_reg(jit_class_gpr); + r1_reg = rn(t0); + } + else { + t0 = JIT_NOREG; + r1_reg = r1; + } + t1 = fallback_jit_get_reg(jit_class_gpr); + t2 = fallback_jit_get_reg(jit_class_gpr); + t3 = fallback_jit_get_reg(jit_class_gpr); + if (r0 == r1) + movr(rn(t0), r1); + extr_uc(rn(t1), r1_reg); + movi(rn(t2), (jit_word_t)pop_tab); + ldxr_uc(r0, rn(t2), rn(t1)); + movi(rn(t3), 8); + fallback_flush(); + loop = _jit->pc.w; + rshr(rn(t1), r1_reg, rn(t3)); + extr_uc(rn(t1), rn(t1)); + ldxr_uc(rn(t1), rn(t2), rn(t1)); + addr(r0, r0, rn(t1)); + addi(rn(t3), rn(t3), 8); + blti(loop, rn(t3), __WORDSIZE); + jit_unget_reg(t3); + jit_unget_reg(t2); + jit_unget_reg(t1); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); +} + +static void +_fallback_lrotr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + /* r0 = (r1 << r2) | (r1 >> (__WORDSIZE - r2)) */ + jit_int32_t t0, t1; + t0 = fallback_jit_get_reg(jit_class_gpr); + if (r0 == r1 || r0 == r2) { + t1 = fallback_jit_get_reg(jit_class_gpr); + lshr(rn(t0), r1, r2); + rsbi(rn(t1), r2, __WORDSIZE); + rshr_u(rn(t1), r1, rn(t1)); + orr(r0, rn(t0), rn(t1)); + jit_unget_reg(t1); + } + else { + lshr(r0, r1, r2); + rsbi(rn(t0), r2, __WORDSIZE); + rshr_u(rn(t0), r1, rn(t0)); + orr(r0, r0, rn(t0)); + } + jit_unget_reg(t0); +} + +static void +_fallback_lroti(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + t0 = fallback_jit_get_reg(jit_class_gpr); + lshi(rn(t0), r1, i0); + rshi_u(r0, r1, __WORDSIZE - i0); + orr(r0, r0, rn(t0)); + jit_unget_reg(t0); +} + +static void +_fallback_rrotr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + /* r0 = (r1 >> r2) | (r1 << (__WORDSIZE - r2)) */ + jit_int32_t t0, t1; + t0 = fallback_jit_get_reg(jit_class_gpr); + if (r0 == r1 || r0 == r2) { + t1 = fallback_jit_get_reg(jit_class_gpr); + rshr_u(rn(t0), r1, r2); + rsbi(rn(t1), r2, __WORDSIZE); + lshr(rn(t1), r1, rn(t1)); + orr(r0, rn(t0), rn(t1)); + jit_unget_reg(t1); + } + else { + rshr_u(r0, r1, r2); + rsbi(rn(t0), r2, __WORDSIZE); + lshr(rn(t0), r1, rn(t0)); + orr(r0, r0, rn(t0)); + } + jit_unget_reg(t0); +} + +static void +_fallback_rroti(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0; + t0 = fallback_jit_get_reg(jit_class_gpr); + rshi_u(rn(t0), r1, i0); + lshi(r0, r1, __WORDSIZE - i0); + orr(r0, r0, rn(t0)); + jit_unget_reg(t0); +} + +static void +_fallback_ext(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1) +{ + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if (i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + if (__WORDSIZE - (i0 + i1)) { + lshi(r0, r1, __WORDSIZE - (i0 + i1)); + rshi(r0, r0, __WORDSIZE - i1); + } + else + rshi(r0, r1, __WORDSIZE - i1); + } +} + +static void +_fallback_ext_u(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1) +{ + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if (i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + if (i0) + rshi_u(r0, r1, i0); + andi(r0, r0, (1L << i1) - 1); + } +} + +static void +_fallback_dep(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0; + jit_word_t mask; + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if (i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + mask = (1L << i1) - 1; + t0 = fallback_jit_get_reg(jit_class_gpr); + andi(rn(t0), r1, mask); + if (i0) { + lshi(rn(t0), rn(t0), i0); + mask <<= i0; + } + andi(r0, r0, ~mask); + orr(r0, r0, rn(t0)); + jit_unget_reg(t0); + } +} + +static void +_fallback_qlshr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + /* r1 = __WORDSIZE - r3; + * if (r1 != __WORDSIZE) { + * r0 = r2 << r3; + * if (r3 != 0) + * r1 = r2 >> r1; + * else + * r1 = 0; + * } + * else { + * r1 = r2; + * r0 = 0; + * } + */ + jit_int32_t t0, s0, t2, s2, t3, s3; + jit_word_t over, zero, done, done_over; + s0 = fallback_jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = fallback_jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = fallback_jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + rsbi(t0, t3, __WORDSIZE); + lshr(r0, t2, t3); + rshr(r1, t2, t0); + zero = fallback_beqi(_jit->pc.w, t3, 0); + over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE); + done = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_jmpi(over, _jit->pc.w); + /* overflow */ + movi(r0, 0); + done_over = fallback_jmpi(_jit->pc.w); + /* zero */ + fallback_flush(); + fallback_patch_beqi(zero, _jit->pc.w); + rshi(r1, t2, __WORDSIZE - 1); + fallback_flush(); + fallback_patch_jmpi(done, _jit->pc.w); + fallback_patch_jmpi(done_over, _jit->pc.w); + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_fallback_qlshi(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + assert((jit_uword_t)i0 <= __WORDSIZE); + if (i0 == 0) { + if (r0 != r2) { + movr(r0, r2); + rshi(r1, r2, __WORDSIZE - 1); + } + else + rshi(r1, r2, __WORDSIZE - 1); + } + else if (i0 != __WORDSIZE) { + rshi(r1, r2, __WORDSIZE - i0); + lshi(r0, r2, i0); + } + else { + movr(r1, r2); + movi(r0, 0); + } +} + +static void +_fallback_qlshr_u(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + /* r1 = __WORDSIZE - r3; + * if (r1 != __WORDSIZE) { + * r0 = r2 << r3; + * if (r3 != 0) + * r1 = (unsigned)r2 >> r1; + * else + * r1 = 0; + * } + * else { + * r1 = r2; + * r0 = 0; + * } + */ + jit_int32_t t0, s0, t2, s2, t3, s3; + jit_word_t over, zero, done, done_over; + s0 = fallback_jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = fallback_jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = fallback_jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + rsbi(t0, t3, __WORDSIZE); + lshr(r0, t2, t3); + rshr_u(r1, t2, t0); + zero = fallback_beqi(_jit->pc.w, t3, 0); + over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE); + done = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_jmpi(over, _jit->pc.w); + /* overflow */ + movi(r0, 0); + done_over = fallback_jmpi(_jit->pc.w); + /* zero */ + fallback_flush(); + fallback_patch_beqi(zero, _jit->pc.w); + movi(r1, 0); + fallback_flush(); + fallback_patch_jmpi(done, _jit->pc.w); + fallback_patch_jmpi(done_over, _jit->pc.w); + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_fallback_qlshi_u(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + assert(i0 <= __WORDSIZE); + if (i0 == 0) { + movr(r0, r2); + movi(r1, 0); + } + else if (i0 != __WORDSIZE) { + rshi_u(r1, r2, __WORDSIZE - i0); + lshi(r0, r2, i0); + } + else { + movr(r1, r2); + movi(r0, 0); + } +} + +static void +_fallback_qrshr(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + /* r1 = __WORDSIZE - r3; + * if (r1 != __WORDSIZE) { + * r0 = r2 >> r3; + * if (r3 != 0) + * r1 = r2 << r1; + * else + * r1 = r2 >> (__WORDSIZE - 1); + * } + * else { + * r1 = r2; + * r0 = r2 >> (__WORDSIZE - 1); + * } + */ + jit_int32_t t0, s0, t2, s2, t3, s3; + jit_word_t over, zero, done, done_over; + s0 = fallback_jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = fallback_jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = fallback_jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + rsbi(t0, t3, __WORDSIZE); + rshr(r0, t2, t3); + lshr(r1, t2, t0); + zero = fallback_beqi(_jit->pc.w, t3, 0); + over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE); + done = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_jmpi(over, _jit->pc.w); + /* underflow */ + rshi(r0, t2, __WORDSIZE - 1); + done_over = fallback_jmpi(_jit->pc.w); + /* zero */ + fallback_flush(); + fallback_patch_beqi(zero, _jit->pc.w); + movi(r1, 0); + fallback_flush(); + fallback_patch_jmpi(done, _jit->pc.w); + fallback_patch_jmpi(done_over, _jit->pc.w); + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_fallback_qrshi(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + assert((jit_uword_t)i0 <= __WORDSIZE); + if (i0 == 0) { + movr(r0, r2); + movi(r1, 0); + } + else if (i0 != __WORDSIZE) { + lshi(r1, r2, __WORDSIZE - i0); + rshi(r0, r2, i0); + } + else { + movr(r1, r2); + rshi(r0, r2, __WORDSIZE - 1); + } +} + +static void +_fallback_qrshr_u(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + /* r1 = __WORDSIZE - r3; + * if (r1 != __WORDSIZE) { + * r0 = (unsigned)r2 >> r3; + * if (r3 != 0) + * r1 = r2 << r1; + * else + * r1 = 0; + * } + * else { + * r1 = r2; + * r0 = 0; + * } + */ + jit_int32_t t0, s0, t2, s2, t3, s3; + jit_word_t over, zero, done, done_over; + s0 = fallback_jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = fallback_jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = fallback_jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + rsbi(t0, t3, __WORDSIZE); + rshr_u(r0, t2, t3); + lshr(r1, t2, t0); + zero = fallback_beqi(_jit->pc.w, t3, 0); + over = fallback_beqi(_jit->pc.w, t3, __WORDSIZE); + done = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_jmpi(over, _jit->pc.w); + /* underflow */ + movi(r0, 0); + done_over = fallback_jmpi(_jit->pc.w); + /* zero */ + fallback_flush(); + fallback_patch_beqi(zero, _jit->pc.w); + movi(r1, 0); + fallback_flush(); + fallback_patch_jmpi(done, _jit->pc.w); + fallback_patch_jmpi(done_over, _jit->pc.w); + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_fallback_qrshi_u(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + assert((jit_uword_t)i0 <= __WORDSIZE); + if (i0 == 0) { + movr(r0, r2); + movi(r1, 0); + } + else if (i0 != __WORDSIZE) { + lshi(r1, r2, __WORDSIZE - i0); + rshi_u(r0, r2, i0); + } + else { + movr(r1, r2); + movi(r0, 0); + } +} + +static void +_unldr2(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + jit_word_t un2, al; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r1, r2); + if (sign) + ldr_s(r0, r1); + else + ldr_us(r0, r1); + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + if (sign) + ldxi_c(r2, r1, 1); + else + ldxi_uc(r2, r1, 1); + lshi(r2, r2, 8); +# else + if (sign) + ldr_c(r0, r1); + else + ldr_uc(r0, r1); + lshi(r0, r0, 8); + ldxi_uc(r2, r1, 1); +# endif + orr(r0, r0, r2); + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unldi2(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + if ((i1 & -2) == i1) { + if (sign) + ldi_s(r0, i1); + else + ldi_us(r0, i1); + } + else { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + if (sign) + ldi_c(r2, i1 + 1); + else + ldi_uc(r2, i1 + 1); + lshi(r2, r2, 8); +# else + if (sign) + ldi_c(r0, i1); + else + ldi_uc(r0, i1); + lshi(r0, r0, 8); + ldi_uc(r2, i1 + 1); +# endif + orr(r0, r0, r2); + jit_unget_reg(t0); + } +} + +static void +_unldr3(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + jit_word_t un2, or; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r1); + if (sign) + ldxi_c(r2, r1, 2); + else + ldxi_uc(r2, r1, 2); + lshi(r2, r2, 16); +# else + if (sign) + ldr_s(r0, r1); + else + ldr_us(r0, r1); + lshi(r0, r0, 8); + ldxi_uc(r2, r1, 2); +# endif + or = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + if (sign) + ldxi_s(r2, r1, 1); + else + ldxi_us(r2, r1, 1); + lshi(r2, r2, 8); +# else + if (sign) + ldr_c(r0, r1); + else + ldr_uc(r0, r1); + lshi(r0, r0, 16); + ldxi_us(r2, r1, 1); +# endif + fallback_flush(); + fallback_patch_jmpi(or, _jit->pc.w); + orr(r0, r0, r2); + jit_unget_reg(t0); +} + +static void +_unldi3(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i1); + if (sign) + ldi_c(r2, i1 + 2); + else + ldi_uc(r2, i1 + 2); + lshi(r2, r2, 16); +# else + if (sign) + ldi_s(r0, i1); + else + ldi_us(r0, i1); + lshi(r0, r0, 8); + ldi_uc(r2, i1 + 2); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + if (sign) + ldi_s(r2, i1 + 1); + else + ldi_us(r2, i1 + 1); + lshi(r2, r2, 8); +# else + if (sign) + ldi_c(r0, i1); + else + ldi_uc(r0, i1); + lshi(r0, r0, 16); + ldi_us(r2, i1 + 1); +# endif + } + orr(r0, r0, r2); + jit_unget_reg(t0); +} + +static void +_unldr4(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 +# if __WORDSIZE == 64 + , jit_bool_t sign +# endif + ) +{ + jit_int32_t t0, r2; + jit_word_t un4, un2, un3, al, or, or3; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -4); + un4 = fallback_bner(_jit->pc.w, r1, r2); +# if __WORDSIZE == 64 + if (sign) +# endif + ldr_i(r0, r1); +# if __WORDSIZE == 64 + else + ldr_ui(r0, r1); +# endif + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un4, _jit->pc.w); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r1); +# if __WORDSIZE == 64 + if (sign) + ldxi_s(r2, r1, 2); + else +# endif + ldxi_us(r2, r1, 2); + lshi(r2, r2, 16); +# else +# if __WORDSIZE == 64 + if (sign) + ldr_s(r0, r1); + else +# endif + ldr_us(r0, r1); + lshi(r0, r0, 16); + ldxi_us(r2, r1, 2); +# endif + or = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); + andi(r2, r1, 3); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_i(r2, r1, 1); + lshi(r2, r2, 8); +# if __WORDSIZE == 64 + if (sign) + extr_i(r2, r2); + else + extr_ui(r2, r2); +# endif +# else +# if __WORDSIZE == 64 + if (sign) + ldr_c(r0, r1); + else +# endif + ldr_uc(r0, r1); + lshi(r0, r0, 24); +# if __WORDSIZE == 32 + ldxi(r2, r1, 1); +# else + ldxi_ui(r2, r1, 1); +# endif + rshi_u(r2, r2, 8); +# endif + or3 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un3, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN +# if __WORDSIZE == 32 + ldxi(r0, r1, -1); +# else + ldxi_ui(r0, r1, -1); +# endif + rshi_u(r0, r0, 8); +# if __WORDSIZE == 64 + if (sign) + ldxi_c(r2, r1, 3); + else +# endif + ldxi_uc(r2, r1, 3); + lshi(r2, r2, 24); +# else + ldxi_i(r0, r1, -1); + lshi(r0, r0, 8); +# if __WORDSIZE == 64 + if (sign) + extr_i(r0, r0); + else + extr_ui(r0, r0); +# endif + ldxi_uc(r2, r1, 3); +# endif + fallback_flush(); + fallback_patch_jmpi(or, _jit->pc.w); + fallback_patch_jmpi(or3, _jit->pc.w); + orr(r0, r0, r2); + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + jit_unget_reg(t0); } + +static void +_unldi4(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1 +# if __WORDSIZE == 64 + , jit_bool_t sign +# endif + ) +{ + jit_int32_t t0, r2; + if ((i1 & -4) == i1) { +# if __WORDSIZE == 64 + if (sign) +# endif + ldi_i(r0, i1); +# if __WORDSIZE == 64 + else + ldi_ui(r0, i1); +# endif + } + else { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i1); +# if __WORDSIZE == 64 + if (sign) + ldi_s(r2, i1 + 2); + else +# endif + ldi_us(r2, i1 + 2); + lshi(r2, r2, 16); +# else +# if __WORDSIZE == 64 + if (sign) + ldi_s(r0, i1); + else +# endif + ldi_us(r0, i1); + lshi(r0, r0, 16); + ldi_us(r2, i1 + 2); +# endif + } + else if ((i1 & 3) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_i(r2, i1 + 1); + lshi(r2, r2, 8); +# if __WORDSIZE == 64 + if (sign) + extr_i(r2, r2); + else + extr_ui(r2, r2); +# endif +# else +# if __WORDSIZE == 64 + if (sign) + ldi_c(r0, i1); + else +# endif + ldi_uc(r0, i1); + lshi(r0, r0, 24); +# if __WORDSIZE == 32 + ldi(r2, i1 + 1); +# else + ldi_ui(r2, i1 + 1); +# endif + rshi_u(r2, r2, 8); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN +# if __WORDSIZE == 32 + ldi(r0, i1 - 1); +# else + ldi_ui(r0, i1 - 1); +# endif + rshi_u(r0, r0, 8); +# if __WORDSIZE == 64 + if (sign) + ldi_c(r2, i1 + 3); + else +# endif + ldi_uc(r2, i1 + 3); + lshi(r2, r2, 24); +# else + ldi_i(r0, i1 - 1); + lshi(r0, r0, 8); +# if __WORDSIZE == 64 + if (sign) + extr_i(r0, r0); + else + extr_ui(r0, r0); +# endif + ldi_uc(r2, i1 + 3); +# endif + } + orr(r0, r0, r2); + jit_unget_reg(t0); + } +} + +# if __WORDSIZE == 64 +static void +_unldr5(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + jit_word_t un4, un2, un3, or, or4, or3; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -4); + un4 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r1); + if (sign) + ldxi_c(r2, r1, 4); + else + ldxi_uc(r2, r1, 4); + lshi(r2, r2, 32); +# else + if (sign) + ldr_i(r0, r1); + else + ldr_ui(r0, r1); + lshi(r0, r0, 8); + ldxi_uc(r2, r1, 4); +# endif + or = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un4, _jit->pc.w); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r1); + ldxi_us(r2, r1, 2); + lshi(r2, r2, 16); + orr(r0, r0, r2); + if (sign) + ldxi_c(r2, r1, 4); + else + ldxi_uc(r2, r1, 4); + lshi(r2, r2, 32); +# else + if (sign) + ldr_s(r0, r1); + else + ldr_us(r0, r1); + lshi(r0, r0, 24); + ldxi_us(r2, r1, 2); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_uc(r2, r1, 4); +# endif + or4 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); + andi(r2, r1, 3); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + if (sign) + ldxi_i(r2, r1, 1); + else + ldxi_ui(r2, r1, 1); + lshi(r2, r2, 8); +# else + if (sign) + ldr_c(r0, r1); + else + ldr_uc(r0, r1); + lshi(r0, r0, 32); + ldxi_ui(r2, r1, 1); +# endif + or3 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un3, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_us(r2, r1, 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + if (sign) + ldxi_s(r2, r1, 3); + else + ldxi_us(r2, r1, 3); + lshi(r2, r2, 24); +# else + if (sign) + ldr_c(r0, r1); + else + ldr_uc(r0, r1); + lshi(r0, r0, 32); + ldxi_us(r2, r1, 1); + lshi(r2, r2, 16); + orr(r0, r0, r2); + ldxi_us(r2, r1, 3); +# endif + fallback_flush(); + fallback_patch_jmpi(or, _jit->pc.w); + fallback_patch_jmpi(or4, _jit->pc.w); + fallback_patch_jmpi(or3, _jit->pc.w); + orr(r0, r0, r2); + jit_unget_reg(t0); +} + +static void +_unldi5(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -4) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i1); + if (sign) + ldi_c(r2, i1 + 4); + else + ldi_uc(r2, i1 + 4); + lshi(r2, r2, 32); +# else + if (sign) + ldi_i(r0, i1); + else + ldi_ui(r0, i1); + lshi(r0, r0, 8); + ldi_uc(r2, i1 + 4); +# endif + } + else if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i1); + ldi_us(r2, i1 + 2); + lshi(r2, r2, 16); + orr(r0, r0, r2); + if (sign) + ldi_c(r2, i1 + 4); + else + ldi_uc(r2, i1 + 4); + lshi(r2, r2, 32); +# else + if (sign) + ldi_s(r0, i1); + else + ldi_us(r0, i1); + lshi(r0, r0, 24); + ldi_us(r2, i1 + 2); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_uc(r2, i1 + 4); +# endif + } + else if ((i1 & 3) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + if (sign) + ldi_i(r2, i1 + 1); + else + ldi_ui(r2, i1 + 1); + lshi(r2, r2, 8); +# else + if (sign) + ldi_c(r0, i1); + else + ldi_uc(r0, i1); + lshi(r0, r0, 32); + ldi_ui(r2, i1 + 1); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_us(r2, i1 + 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + if (sign) + ldi_s(r2, i1 + 3); + else + ldi_us(r2, i1 + 3); + lshi(r2, r2, 24); +# else + if (sign) + ldi_c(r0, i1); + else + ldi_uc(r0, i1); + lshi(r0, r0, 32); + ldi_us(r2, i1 + 1); + lshi(r2, r2, 16); + orr(r0, r0, r2); + ldi_us(r2, i1 + 3); +# endif + } + orr(r0, r0, r2); + jit_unget_reg(t0); +} + +static void +_unldr6(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + jit_word_t un4, un2, un3, or, or2, or3; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -4); + un4 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r1); + if (sign) + ldxi_s(r2, r1, 4); + else + ldxi_us(r2, r1, 4); + lshi(r2, r2, 32); +# else + if (sign) + ldr_i(r0, r1); + else + ldr_ui(r0, r1); + lshi(r0, r0, 16); + ldxi_us(r2, r1, 4); +# endif + or = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un4, _jit->pc.w); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r1); + if (sign) + ldxi_i(r2, r1, 2); + else + ldxi_ui(r2, r1, 2); + lshi(r2, r2, 16); +# else + if (sign) + ldr_s(r0, r1); + else + ldr_us(r0, r1); + lshi(r0, r0, 32); + ldxi_ui(r2, r1, 2); +# endif + or2 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); + andi(r2, r1, 3); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_ui(r2, r1, 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + if (sign) + ldxi_c(r2, r1, 5); + else + ldxi_uc(r2, r1, 5); + lshi(r2, r2, 40); +# else + if (sign) + ldr_c(r0, r1); + else + ldr_uc(r0, r1); + lshi(r0, r0, 40); + ldxi_ui(r2, r1, 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_uc(r2, r1, 5); +# endif + or3 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un3, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_us(r2, r1, 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_us(r2, r1, 3); + lshi(r2, r2, 24); + orr(r0, r0, r2); + if (sign) + ldxi_c(r2, r1, 5); + else + ldxi_uc(r2, r1, 5); + lshi(r2, r2, 40); +# else + if (sign) + ldr_c(r0, r1); + else + ldr_uc(r0, r1); + lshi(r0, r0, 40); + ldxi_us(r2, r1, 1); + lshi(r2, r2, 24); + orr(r0, r0, r2); + ldxi_us(r2, r1, 3); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_uc(r2, r1, 5); +# endif + fallback_flush(); + fallback_patch_jmpi(or, _jit->pc.w); + fallback_patch_jmpi(or2, _jit->pc.w); + fallback_patch_jmpi(or3, _jit->pc.w); + orr(r0, r0, r2); + jit_unget_reg(t0); +} + +static void +_unldi6(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -4) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i1); + if (sign) + ldi_s(r2, i1 + 4); + else + ldi_us(r2, i1 + 4); + lshi(r2, r2, 32); +# else + if (sign) + ldi_i(r0, i1); + else + ldi_ui(r0, i1); + lshi(r0, r0, 16); + ldi_us(r2, i1 + 4); +# endif + } + else if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i1); + if (sign) + ldi_i(r2, i1 + 2); + else + ldi_ui(r2, i1 + 2); + lshi(r2, r2, 16); +# else + if (sign) + ldi_s(r0, i1); + else + ldi_us(r0, i1); + lshi(r0, r0, 32); + ldi_ui(r2, i1 + 2); +# endif + } + else if ((i1 & 3) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_ui(r2, i1 + 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + if (sign) + ldi_c(r2, i1 + 5); + else + ldi_uc(r2, i1 + 5); + lshi(r2, r2, 40); +# else + if (sign) + ldi_c(r0, i1); + else + ldi_uc(r0, i1); + lshi(r0, r0, 40); + ldi_ui(r2, i1 + 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_uc(r2, i1 + 5); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_us(r2, i1 + 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_us(r2, i1 + 3); + lshi(r2, r2, 24); + orr(r0, r0, r2); + if (sign) + ldi_c(r2, i1 + 5); + else + ldi_uc(r2, i1 + 5); + lshi(r2, r2, 40); +# else + if (sign) + ldi_c(r0, i1); + else + ldi_uc(r0, i1); + lshi(r0, r0, 40); + ldi_us(r2, i1 + 1); + lshi(r2, r2, 24); + orr(r0, r0, r2); + ldi_us(r2, i1 + 3); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_uc(r2, i1 + 5); +# endif + } + orr(r0, r0, r2); + jit_unget_reg(t0); +} + +static void +_unldr7(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + jit_word_t un4, un2, un3, or, or2, or3; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -4); + un4 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r1); + ldxi_us(r2, r1, 4); + lshi(r2, r2, 32); + orr(r0, r0, r2); + if (sign) + ldxi_c(r2, r1, 6); + else + ldxi_uc(r2, r1, 6); + lshi(r2, r2, 48); +# else + if (sign) + ldr_i(r0, r1); + else + ldr_ui(r0, r1); + lshi(r0, r0, 24); + ldxi_us(r2, r1, 4); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_uc(r2, r1, 6); +# endif + or = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un4, _jit->pc.w); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r1); + ldxi_ui(r2, r1, 2); + lshi(r2, r2, 16); + orr(r0, r0, r2); + if (sign) + ldxi_c(r2, r1, 6); + else + ldxi_uc(r2, r1, 6); + lshi(r2, r2, 48); +# else + if (sign) + ldr_s(r0, r1); + else + ldr_us(r0, r1); + lshi(r0, r0, 40); + ldxi_ui(r2, r1, 2); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_uc(r2, r1, 6); +# endif + or2 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); + andi(r2, r1, 3); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_ui(r2, r1, 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + if (sign) + ldxi_s(r2, r1, 5); + else + ldxi_us(r2, r1, 5); + lshi(r2, r2, 40); +# else + if (sign) + ldr_c(r0, r1); + else + ldr_uc(r0, r1); + lshi(r0, r0, 48); + ldxi_ui(r2, r1, 1); + lshi(r2, r2, 16); + orr(r0, r0, r2); + ldxi_us(r2, r1, 5); +# endif + or3 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un3, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_us(r2, r1, 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + if (sign) + ldxi_i(r2, r1, 3); + else + ldxi_ui(r2, r1, 3); + lshi(r2, r2, 24); +# else + if (sign) + ldr_c(r0, r1); + else + ldr_uc(r0, r1); + lshi(r0, r0, 48); + ldxi_us(r2, r1, 1); + lshi(r2, r2, 32); + orr(r0, r0, r2); + ldxi_ui(r2, r1, 3); +# endif + fallback_flush(); + fallback_patch_jmpi(or, _jit->pc.w); + fallback_patch_jmpi(or2, _jit->pc.w); + fallback_patch_jmpi(or3, _jit->pc.w); + orr(r0, r0, r2); + jit_unget_reg(t0); +} + +static void +_unldi7(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1, jit_bool_t sign) +{ + jit_int32_t t0, r2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -4) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i1); + ldi_us(r2, i1 + 4); + lshi(r2, r2, 32); + orr(r0, r0, r2); + if (sign) + ldi_c(r2, i1 + 6); + else + ldi_uc(r2, i1 + 6); + lshi(r2, r2, 48); +# else + if (sign) + ldi_i(r0, i1); + else + ldi_ui(r0, i1); + lshi(r0, r0, 24); + ldi_us(r2, i1 + 4); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_uc(r2, i1 + 6); +# endif + } + else if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i1); + ldi_ui(r2, i1 + 2); + lshi(r2, r2, 16); + orr(r0, r0, r2); + if (sign) + ldi_c(r2, i1 + 6); + else + ldi_uc(r2, i1 + 6); + lshi(r2, r2, 48); +# else + if (sign) + ldi_s(r0, i1); + else + ldi_us(r0, i1); + lshi(r0, r0, 40); + ldi_ui(r2, i1 + 2); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_uc(r2, i1 + 6); +# endif + } + else if ((i1 & 3) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_ui(r2, i1 + 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + if (sign) + ldi_s(r2, i1 + 5); + else + ldi_us(r2, i1 + 5); + lshi(r2, r2, 40); +# else + if (sign) + ldi_c(r0, i1); + else + ldi_uc(r0, i1); + lshi(r0, r0, 48); + ldi_ui(r2, i1 + 1); + lshi(r2, r2, 16); + orr(r0, r0, r2); + ldi_us(r2, i1 + 5); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_us(r2, i1 + 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + if (sign) + ldi_i(r2, i1 + 3); + else + ldi_ui(r2, i1 + 3); + lshi(r2, r2, 24); +# else + if (sign) + ldi_c(r0, i1); + else + ldi_uc(r0, i1); + lshi(r0, r0, 48); + ldi_us(r2, i1 + 1); + lshi(r2, r2, 32); + orr(r0, r0, r2); + ldi_ui(r2, i1 + 3); +# endif + } + orr(r0, r0, r2); + jit_unget_reg(t0); +} + +static void +_unldr8(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, r2; + jit_word_t un8, un4, un2, un7, un6, un5, un3, al, + or, or2, or7, or6, or5, or3; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -8); + un8 = fallback_bner(_jit->pc.w, r1, r2); + ldr_l(r0, r1); + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un8, _jit->pc.w); + andi(r2, r1, -4); + un4 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r1); + ldxi_i(r2, r1, 4); + lshi(r2, r2, 32); +# else + ldr_i(r0, r1); + ldxi_ui(r2, r1, 4); + lshi(r0, r0, 32); +# endif + or = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un4, _jit->pc.w); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r1, r2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r1); + ldxi_ui(r2, r1, 2); + lshi(r2, r2, 16); + orr(r0, r0, r2); + ldxi_s(r2, r1, 6); + lshi(r2, r2, 48); +# else + ldr_s(r0, r1); + lshi(r0, r0, 48); + ldxi_ui(r2, r1, 2); + lshi(r2, r2, 16); + orr(r0, r0, r2); + ldxi_us(r2, r1, 6); +# endif + or2 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); + andi(r2, r1, 7); + un7 = fallback_bnei(_jit->pc.w, r2, 7); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_l(r2, r1, 1); + lshi(r2, r2, 8); +# else + ldr_c(r0, r1); + ldxi_l(r2, r1, 1); + rshi_u(r2, r2, 8); + lshi(r0, r0, 56); +# endif + or7 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un7, _jit->pc.w); + un6 = fallback_bnei(_jit->pc.w, r2, 6); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r1); + ldxi_l(r2, r1, 2); + lshi(r2, r2, 16); +# else + ldr_s(r0, r1); + lshi(r0, r0, 48); + ldxi_l(r2, r1, 2); + rshi_u(r2, r2, 16); +# endif + or6 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un6, _jit->pc.w); + un5 = fallback_bnei(_jit->pc.w, r2, 5); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldxi_ui(r0, r1, -1); + rshi_u(r0, r0, 8); + ldxi_ui(r2, r1, 3); + lshi(r2, r2, 24); + orr(r0, r0, r2); + ldxi_c(r2, r1, 7); + lshi(r2, r2, 56); +# else + ldxi_i(r0, r1, -1); + lshi(r0, r0, 40); + ldxi_ui(r2, r1, 3); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_uc(r2, r1, 7); +# endif + or5 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un5, _jit->pc.w); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_ui(r2, r1, 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_i(r2, r1, 5); + lshi(r2, r2, 40); +# else + ldr_c(r0, r1); + lshi(r0, r0, 56); + ldxi_ui(r2, r1, 1); + lshi(r2, r2, 24); + orr(r0, r0, r2); + ldxi_ui(r2, r1, 5); + rshi_u(r2, r2, 8); +# endif + or3 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un3, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_uc(r0, r1); + ldxi_us(r2, r1, 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_ui(r2, r1, 3); + lshi(r2, r2, 24); + orr(r0, r0, r2); + ldxi_c(r2, r1, 7); + lshi(r2, r2, 56); +# else + ldr_c(r0, r1); + lshi(r0, r0, 56); + ldxi_us(r2, r1, 1); + lshi(r2, r2, 40); + orr(r0, r0, r2); + ldxi_ui(r2, r1, 3); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldxi_uc(r2, r1, 7); +# endif + fallback_flush(); + fallback_patch_jmpi(or, _jit->pc.w); + fallback_patch_jmpi(or2, _jit->pc.w); + fallback_patch_jmpi(or7, _jit->pc.w); + fallback_patch_jmpi(or6, _jit->pc.w); + fallback_patch_jmpi(or5, _jit->pc.w); + fallback_patch_jmpi(or3, _jit->pc.w); + orr(r0, r0, r2); + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unldi8(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if ((i1 & -8) == i1) + ldi_l(r0, i1); + else { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -4) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i1); + ldi_i(r2, i1 + 4); + lshi(r2, r2, 32); +# else + ldi_i(r0, i1); + ldi_ui(r2, i1 + 4); + lshi(r0, r0, 32); +# endif + } + else if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i1); + ldi_ui(r2, i1 + 2); + lshi(r2, r2, 16); + orr(r0, r0, r2); + ldi_s(r2, i1 + 6); + lshi(r2, r2, 48); +# else + ldi_s(r0, i1); + lshi(r0, r0, 48); + ldi_ui(r2, i1 + 2); + lshi(r2, r2, 16); + orr(r0, r0, r2); + ldi_us(r2, i1 + 6); +# endif + } + else if ((i1 & 7) == 7) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_l(r2, i1 + 1); + lshi(r2, r2, 8); +# else + ldi_c(r0, i1); + ldi_l(r2, i1 + 1); + rshi_u(r2, r2, 8); + lshi(r0, r0, 56); +# endif + } + else if ((i1 & 7) == 6) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i1); + ldi_l(r2, i1 + 2); + lshi(r2, r2, 16); +# else + ldi_s(r0, i1); + lshi(r0, r0, 48); + ldi_l(r2, i1 + 2); + rshi_u(r2, r2, 16); +# endif + } + else if ((i1 & 7) == 5) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i1 - 1); + rshi_u(r0, r0, 8); + ldi_ui(r2, i1 + 3); + lshi(r2, r2, 24); + orr(r0, r0, r2); + ldi_c(r2, i1 + 7); + lshi(r2, r2, 56); +# else + ldi_i(r0, i1 - 1); + lshi(r0, r0, 40); + ldi_ui(r2, i1 + 3); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_uc(r2, i1 + 7); +# endif + } + else if ((i1 & 7) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_ui(r2, i1 + 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_i(r2, i1 + 5); + lshi(r2, r2, 40); +# else + ldi_c(r0, i1); + lshi(r0, r0, 56); + ldi_ui(r2, i1 + 1); + lshi(r2, r2, 24); + orr(r0, r0, r2); + ldi_ui(r2, i1 + 5); + rshi_u(r2, r2, 8); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_uc(r0, i1); + ldi_us(r2, i1 + 1); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_ui(r2, i1 + 3); + lshi(r2, r2, 24); + orr(r0, r0, r2); + ldi_c(r2, i1 + 7); + lshi(r2, r2, 56); +# else + ldi_c(r0, i1); + lshi(r0, r0, 56); + ldi_us(r2, i1 + 1); + lshi(r2, r2, 40); + orr(r0, r0, r2); + ldi_ui(r2, i1 + 3); + lshi(r2, r2, 8); + orr(r0, r0, r2); + ldi_uc(r2, i1 + 7); +# endif + } + orr(r0, r0, r2); + jit_unget_reg(t0); + } +} +# endif + +static void +_unstr2(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, r2; + jit_word_t un2, al; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r2, r1); + str_s(r1, r0); + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); +#if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi_u(r2, r0, 8); + stxi_c(1, r1, r2); +#else + stxi_c(1, r1, r0); + rshi_u(r2, r0, 8); + str_c(r1, r2); +#endif + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unsti2(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if ((i1 & -2) == i1) + sti_s(i1, r0); + else { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); +#if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi_u(r2, r0, 8); + sti_c(1 + i1, r2); +#else + sti_c(1 + i1, r0); + rshi_u(r2, r0, 8); + sti_c(i1, r2); +#endif + jit_unget_reg(t0); + } +} + +static void +_unstr3(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, r2; + jit_word_t un2, al; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r2, r1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_s(r1, r0); + rshi(r2, r0, 16); + stxi_c(2, r1, r2); +# else + stxi_c(2, r1, r0); + rshi(r2, r0, 8); + str_s(r1, r2); +# endif + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_s(1, r1, r2); +# else + stxi_s(1, r1, r0); + rshi(r2, r0, 16); + str_c(r1, r2); +# endif + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unsti3(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_s(i1, r0); + rshi(r2, r0, 16); + sti_c(2 + i1, r2); +# else + sti_c(2 + i1, r0); + rshi(r2, r0, 8); + sti_s(i1, r2); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_s(1 + i1, r2); +# else + sti_s(1 + i1, r0); + rshi(r2, r0, 16); + sti_c(i1, r2); +# endif + } + jit_unget_reg(t0); +} + +static void +_unstr4(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, r2; + jit_word_t un4, un2, al, al2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -4); + un4 = fallback_bner(_jit->pc.w, r2, r1); + str_i(r1, r0); + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un4, _jit->pc.w); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r2, r1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_s(r1, r0); + rshi(r2, r0, 16); + stxi_s(2, r1, r2); +# else + stxi_s(2, r1, r0); + rshi(r2, r0, 16); + str_s(r1, r2); +# endif + al2 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + stxi_c(3, r1, r2); +# else + stxi_c(3, r1, r0); + rshi(r2, r0, 8); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + str_c(r1, r2); +# endif + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + fallback_patch_jmpi(al2, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unsti4(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if ((i1 & -4) == i1) + sti_i(i1, r0); + else { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_s(i1, r0); + rshi(r2, r0, 16); + sti_s(2 + i1, r2); +# else + sti_s(2 + i1, r0); + rshi(r2, r0, 16); + sti_s(i1, r2); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_c(3 + i1, r2); +# else + sti_c(3 + i1, r0); + rshi(r2, r0, 8); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_c(i1, r2); +# endif + } + jit_unget_reg(t0); + } +} + +# if __WORDSIZE == 64 +static void +_unstr5(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, r2; + jit_word_t un3, un2, un1, al, al2, al1; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, 3); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_i(1, r1, r2); +# else + stxi_i(1, r1, r0); + rshi(r2, r0, 32); + str_c(r1, r2); +# endif + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un3, _jit->pc.w); + un2 = fallback_bnei(_jit->pc.w, r2, 2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_s(r1, r0); + rshi(r2, r0, 16); + stxi_s(2, r1, r2); + rshi(r2, r2, 16); + stxi_c(4, r1, r2); +# else + stxi_c(4, r1, r0); + rshi(r2, r0, 8); + stxi_s(2, r1, r2); + rshi(r2, r2, 16); + str_s(r1, r2); +# endif + al2 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un2, _jit->pc.w); + un1 = fallback_bnei(_jit->pc.w, r2, 1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + stxi_s(3, r1, r2); +# else + stxi_s(3, r1, r0); + rshi(r2, r0, 16); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + str_c(r1, r2); +# endif + al1 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un1, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_i(r1, r0); + rshi(r2, r0, 32); + stxi_c(4, r1, r2); +# else + stxi_c(4, r1, r0); + rshi(r2, r0, 8); + str_i(r1, r2); +# endif + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + fallback_patch_jmpi(al2, _jit->pc.w); + fallback_patch_jmpi(al1, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unsti5(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & 3) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_i(1 + i1, r2); +# else + sti_i(1 + i1, r0); + rshi(r2, r0, 32); + sti_c(i1, r2); +# endif + } + else if ((i1 & 3) == 2) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_s(i1, r0); + rshi(r2, r0, 16); + sti_s(2 + i1, r2); + rshi(r2, r2, 16); + sti_c(4 + i1, r2); +# else + sti_c(4 + i1, r0); + rshi(r2, r0, 8); + sti_s(2 + i1, r2); + rshi(r2, r2, 16); + sti_s(i1, r2); +# endif + } + else if ((i1 & 3) == 1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_s(3 + i1, r2); +# else + sti_s(3 + i1, r0); + rshi(r2, r0, 16); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_c(i1, r2); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_i(i1, r0); + rshi(r2, r0, 32); + sti_c(4 + i1, r2); +# else + sti_c(4 + i1, r0); + rshi(r2, r0, 8); + sti_i(i1, r2); +# endif + } + jit_unget_reg(t0); +} + +static void +_unstr6(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, r2; + jit_word_t un3, un2, un1, al, al2, al1; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, 3); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_i(1, r1, r2); + rshi(r2, r2, 32); + stxi_c(5, r1, r2); +# else + stxi_c(5, r1, r0); + rshi(r2, r0, 8); + stxi_i(1, r1, r2); + rshi(r2, r2, 32); + str_c(r1, r2); +# endif + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un3, _jit->pc.w); + un2 = fallback_bnei(_jit->pc.w, r2, 2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_s(r1, r0); + rshi(r2, r0, 16); + stxi_s(2, r1, r2); + rshi(r2, r2, 16); + stxi_s(4, r1, r2); +# else + stxi_s(4, r1, r0); + rshi(r2, r0, 16); + stxi_s(2, r1, r2); + rshi(r2, r2, 16); + str_s(r1, r2); +# endif + al2 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un2, _jit->pc.w); + un1 = fallback_bnei(_jit->pc.w, r2, 1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + stxi_s(3, r1, r2); + rshi(r2, r2, 16); + stxi_c(5, r1, r2); +# else + stxi_c(5, r1, r0); + rshi(r2, r0, 8); + stxi_s(3, r1, r2); + rshi(r2, r2, 16); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + str_c(r1, r2); +# endif + al1 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un1, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_i(r1, r0); + rshi(r2, r0, 32); + stxi_s(4, r1, r2); +# else + stxi_s(4, r1, r0); + rshi(r2, r0, 16); + str_i(r1, r2); +# endif + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + fallback_patch_jmpi(al2, _jit->pc.w); + fallback_patch_jmpi(al1, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unsti6(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & 3) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_i(1 + i1, r2); + rshi(r2, r2, 32); + sti_c(5 + i1, r2); +# else + sti_c(5 + i1, r0); + rshi(r2, r0, 8); + sti_i(1 + i1, r2); + rshi(r2, r2, 32); + sti_c(i1, r2); +# endif + } + else if ((i1 & 3) == 2) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_s(i1, r0); + rshi(r2, r0, 16); + sti_s(2 + i1, r2); + rshi(r2, r2, 16); + sti_s(4 + i1, r2); +# else + sti_s(4 + i1, r0); + rshi(r2, r0, 16); + sti_s(2 + i1, r2); + rshi(r2, r2, 16); + sti_s(i1, r2); +# endif + } + else if ((i1 & 3) == 1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_s(3 + i1, r2); + rshi(r2, r2, 16); + sti_c(5 + i1, r2); +# else + sti_c(5 + i1, r0); + rshi(r2, r0, 8); + sti_s(3 + i1, r2); + rshi(r2, r2, 16); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_c(i1, r2); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_i(i1, r0); + rshi(r2, r0, 32); + sti_s(4 + i1, r2); +# else + sti_s(4 + i1, r0); + rshi(r2, r0, 16); + sti_i(i1, r2); +# endif + } + jit_unget_reg(t0); +} + +static void +_unstr7(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, r2; + jit_word_t un3, un2, un1, al, al2, al1; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, 3); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_i(1, r1, r2); + rshi(r2, r2, 32); + stxi_s(5, r1, r2); +# else + stxi_s(5, r1, r0); + rshi(r2, r0, 16); + stxi_i(1, r1, r2); + rshi(r2, r2, 32); + str_c(r1, r2); +# endif + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un3, _jit->pc.w); + un2 = fallback_bnei(_jit->pc.w, r2, 2); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_s(r1, r0); + rshi(r2, r0, 16); + stxi_i(2, r1, r2); + rshi(r2, r2, 32); + stxi_c(6, r1, r2); +# else + stxi_c(6, r1, r0); + rshi(r2, r0, 8); + stxi_i(2, r1, r2); + rshi(r2, r2, 32); + str_s(r1, r2); +# endif + al2 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un2, _jit->pc.w); + un1 = fallback_bnei(_jit->pc.w, r2, 1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + stxi_i(3, r1, r2); +# else + stxi_i(3, r1, r0); + rshi(r2, r0, 32); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + str_c(r1, r2); +# endif + al1 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un1, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_i(r1, r0); + rshi(r2, r0, 32); + stxi_s(4, r1, r2); + rshi(r2, r2, 16); + stxi_c(6, r1, r2); +# else + stxi_c(6, r1, r0); + rshi(r2, r0, 8); + stxi_s(4, r1, r2); + rshi(r2, r2, 16); + str_i(r1, r2); +# endif + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + fallback_patch_jmpi(al2, _jit->pc.w); + fallback_patch_jmpi(al1, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unsti7(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & 3) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_i(1 + i1, r2); + rshi(r2, r2, 32); + sti_s(5 + i1, r2); +# else + sti_s(5 + i1, r0); + rshi(r2, r0, 16); + sti_i(1 + i1, r2); + rshi(r2, r2, 32); + sti_c(i1, r2); +# endif + } + else if ((i1 & 3) == 2) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_s(i1, r0); + rshi(r2, r0, 16); + sti_i(2 + i1, r2); + rshi(r2, r2, 32); + sti_c(6 + i1, r2); +# else + sti_c(6 + i1, r0); + rshi(r2, r0, 8); + sti_i(2 + i1, r2); + rshi(r2, r2, 32); + sti_s(i1, r2); +# endif + } + else if ((i1 & 3) == 1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_i(3 + i1, r2); +# else + sti_i(3 + i1, r0); + rshi(r2, r0, 32); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_c(i1, r2); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_i(i1, r0); + rshi(r2, r0, 32); + sti_s(4 + i1, r2); + rshi(r2, r2, 16); + sti_c(6 + i1, r2); +# else + sti_c(6 + i1, r0); + rshi(r2, r0, 8); + sti_s(4 + i1, r2); + rshi(r2, r2, 16); + sti_i(i1, r2); +# endif + } + jit_unget_reg(t0); +} + +static void +_unstr8(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, r2; + jit_word_t un8, un4, un2, un3, al, al4, al2, al3; + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + andi(r2, r1, -8); + un8 = fallback_bner(_jit->pc.w, r2, r1); + str_l(r1, r0); + al = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un8, _jit->pc.w); + andi(r2, r1, -4); + un4 = fallback_bner(_jit->pc.w, r2, r1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_i(r1, r0); + rshi(r2, r0, 32); + stxi_i(4, r1, r2); +# else + stxi_i(4, r1, r0); + rshi(r2, r0, 32); + str_i(r1, r2); +# endif + al4 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un4, _jit->pc.w); + andi(r2, r1, -2); + un2 = fallback_bner(_jit->pc.w, r2, r1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_s(r1, r0); + rshi(r2, r0, 16); + stxi_i(2, r1, r2); + rshi(r2, r2, 32); + stxi_s(6, r1, r2); +# else + stxi_s(6, r1, r0); + rshi(r2, r0, 16); + stxi_i(2, r1, r2); + rshi(r2, r2, 32); + str_s(r1, r2); +# endif + al2 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bner(un2, _jit->pc.w); + andi(r2, r1, 3); + un3 = fallback_bnei(_jit->pc.w, r2, 3); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_i(1, r1, r2); + rshi(r2, r2, 32); + stxi_s(5, r1, r2); + rshi(r2, r2, 16); + stxi_c(7, r1, r2); +# else + stxi_c(7, r1, r0); + rshi(r2, r0, 8); + stxi_s(5, r1, r2); + rshi(r2, r2, 16); + stxi_i(1, r1, r2); + rshi(r2, r2, 32); + str_c(r1, r2); +# endif + al3 = fallback_jmpi(_jit->pc.w); + fallback_flush(); + fallback_patch_bnei(un3, _jit->pc.w); +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_c(r1, r0); + rshi(r2, r0, 8); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + stxi_i(3, r1, r2); + rshi(r2, r2, 32); + stxi_c(7, r1, r2); +# else + stxi_c(7, r1, r0); + rshi(r2, r0, 8); + stxi_i(3, r1, r2); + rshi(r2, r2, 32); + stxi_s(1, r1, r2); + rshi(r2, r2, 16); + str_c(r1, r2); +# endif + fallback_flush(); + fallback_patch_jmpi(al, _jit->pc.w); + fallback_patch_jmpi(al4, _jit->pc.w); + fallback_patch_jmpi(al2, _jit->pc.w); + fallback_patch_jmpi(al3, _jit->pc.w); + jit_unget_reg(t0); +} + +static void +_unsti8(jit_state_t *_jit, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if ((i1 & -8) == i1) + sti_l(i1, r0); + else { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if ((i1 & -4) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_i(i1, r0); + rshi(r2, r0, 32); + sti_i(4 + i1, r2); +# else + sti_i(4 + i1, r0); + rshi(r2, r0, 32); + sti_i(i1, r2); +# endif + } + else if ((i1 & -2) == i1) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_s(i1, r0); + rshi(r2, r0, 16); + sti_i(2 + i1, r2); + rshi(r2, r2, 32); + sti_s(6 + i1, r2); +# else + sti_s(6 + i1, r0); + rshi(r2, r0, 16); + sti_i(2 + i1, r2); + rshi(r2, r2, 32); + sti_s(i1, r2); +# endif + } + else if ((i1 & 3) == 3) { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_i(1 + i1, r2); + rshi(r2, r2, 32); + sti_s(5 + i1, r2); + rshi(r2, r2, 16); + sti_c(7 + i1, r2); +# else + sti_c(7 + i1, r0); + rshi(r2, r0, 8); + sti_s(5 + i1, r2); + rshi(r2, r2, 16); + sti_i(1 + i1, r2); + rshi(r2, r2, 32); + sti_c(i1, r2); +# endif + } + else { +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_c(i1, r0); + rshi(r2, r0, 8); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_i(3 + i1, r2); + rshi(r2, r2, 32); + sti_c(7 + i1, r2); +# else + sti_c(7 + i1, r0); + rshi(r2, r0, 8); + sti_i(3 + i1, r2); + rshi(r2, r2, 32); + sti_s(1 + i1, r2); + rshi(r2, r2, 16); + sti_c(i1, r2); +# endif + } + jit_unget_reg(t0); + } +} +# endif + + +static void +_unldrw(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_word_t cross, done; + jit_int32_t t0, t1, t2, t3; + t0 = fallback_jit_get_reg(jit_class_gpr); + if (r0 == r1) { + t1 = fallback_jit_get_reg(jit_class_gpr); + movr(rn(t1), r1); + r1 = rn(t1); + } + else + t1 = JIT_NOREG; + andi(rn(t0), r1, -(jit_word_t)sizeof(jit_word_t)); + ldr(r0, rn(t0)); + done = fallback_beqr(_jit->pc.w, rn(t0), r1); + t2 = fallback_jit_get_reg(jit_class_gpr); + andi(rn(t2), r1, sizeof(jit_word_t) - 1); + t3 = fallback_jit_get_reg(jit_class_gpr); + addi(rn(t3), rn(t2), i0); + cross = fallback_blei(_jit->pc.w, rn(t3), sizeof(jit_word_t)); + ldxi(rn(t0), rn(t0), sizeof(jit_word_t)); + fallback_flush(); + fallback_patch_blei(cross, _jit->pc.w); + jit_unget_reg(t3); + lshi(rn(t2), rn(t2), 3); +#if __BYTE_ORDER == __LITTLE_ENDIAN + rshr_u(r0, r0, rn(t2)); + rsbi(rn(t2), rn(t2), __WORDSIZE); + lshr(rn(t0), rn(t0), rn(t2)); +#else + lshr(r0, r0, rn(t2)); + rsbi(rn(t2), rn(t2), __WORDSIZE); + rshr_u(rn(t0), rn(t0), rn(t2)); +#endif + jit_unget_reg(t2); + orr(r0, r0, rn(t0)); + fallback_flush(); + fallback_patch_beqr(done, _jit->pc.w); + jit_unget_reg(t0); + if (t1 != JIT_NOREG) + jit_unget_reg(t1); +} + +static void +_unldiw(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_word_t addr; + jit_int32_t t0, sh; + addr = i0 & -(jit_word_t)sizeof(jit_word_t); + ldi(r0, addr); + if (i0 != addr) { + sh = (i0 & (sizeof(jit_word_t) - 1)) << 3; + if (sh + i1 > sizeof(jit_word_t)) { + t0 = fallback_jit_get_reg(jit_class_gpr); + ldi(rn(t0), addr + sizeof(jit_word_t)); +#if __BYTE_ORDER == __LITTLE_ENDIAN + rshi_u(r0, r0, sh); + lshi(rn(t0), rn(t0), __WORDSIZE - sh); +#else + lshi(r0, r0, sh); + rshi_u(rn(t0), rn(t0), __WORDSIZE - sh); +#endif + orr(r0, r0, rn(t0)); + jit_unget_reg(t0); + } + } +} + +static void +_unldx(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + switch (i0) { + case 2: +#if __BYTE_ORDER == __LITTLE_ENDIAN + extr_s(r0, r0); +#else + rshi(r0, r0, __WORDSIZE - 16); +#endif + break; + case 3: +#if __BYTE_ORDER == __LITTLE_ENDIAN + lshi(r0, r0, __WORDSIZE - 24); +#endif + rshi(r0, r0, __WORDSIZE - 24); + break; +#if __WORDSIZE == 32 + default: +#else + case 4: +# if __BYTE_ORDER == __LITTLE_ENDIAN + extr_i(r0, r0); +# else + rshi(r0, r0, __WORDSIZE - 32); +# endif +#endif + break; +#if __WORDSIZE == 64 + case 5: +# if __BYTE_ORDER == __LITTLE_ENDIAN + lshi(r0, r0, __WORDSIZE - 40); +# endif + rshi(r0, r0, __WORDSIZE - 40); + break; + case 6: +# if __BYTE_ORDER == __LITTLE_ENDIAN + lshi(r0, r0, __WORDSIZE - 48); +# endif + rshi(r0, r0, __WORDSIZE - 48); + break; + case 7: +# if __BYTE_ORDER == __LITTLE_ENDIAN + lshi(r0, r0, __WORDSIZE - 56); +# endif + rshi(r0, r0, __WORDSIZE - 56); + break; + default: + break; +#endif + } +} + +static void +_unldx_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + switch (i0) { + case 2: +#if __BYTE_ORDER == __LITTLE_ENDIAN + extr_us(r0, r0); +#else + rshi_u(r0, r0, __WORDSIZE - 16); +#endif + break; + case 3: +#if __BYTE_ORDER == __LITTLE_ENDIAN + lshi(r0, r0, __WORDSIZE - 24); +#endif + rshi_u(r0, r0, __WORDSIZE - 24); + break; +#if __WORDSIZE == 32 + default: +#else + case 4: +# if __BYTE_ORDER == __LITTLE_ENDIAN + extr_ui(r0, r0); +# else + rshi_u(r0, r0, __WORDSIZE - 32); +# endif +#endif + break; +#if __WORDSIZE == 64 + case 5: +# if __BYTE_ORDER == __LITTLE_ENDIAN + lshi(r0, r0, __WORDSIZE - 40); +# endif + rshi_u(r0, r0, __WORDSIZE - 40); + break; + case 6: +# if __BYTE_ORDER == __LITTLE_ENDIAN + lshi(r0, r0, __WORDSIZE - 48); +# endif + rshi_u(r0, r0, __WORDSIZE - 48); + break; + case 7: +# if __BYTE_ORDER == __LITTLE_ENDIAN + lshi(r0, r0, __WORDSIZE - 56); +# endif + rshi_u(r0, r0, __WORDSIZE - 56); + break; + default: + break; +#endif + } +} + +static void +_fallback_unldr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; + assert(i0 >= 1 && i0 <= (__WORDSIZE >> 3)); + if (i0 == 1) + ldr_c(r0, r1); + else if (_jitc->unld_algorithm != 0) { + if (r0 == r1) { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + movr(r2, r0); + } + else + r2 = r1; + switch (i0) { + case 2: unldr2(r0, r2, 1); break; + case 3: unldr3(r0, r2, 1); break; +# if __WORDSIZE == 32 + default: unldr4(r0, r2); break; +# else + case 4: unldr4(r0, r2, 1); break; + case 5: unldr5(r0, r2, 1); break; + case 6: unldr6(r0, r2, 1); break; + case 7: unldr7(r0, r2, 1); break; + default: unldr8(r0, r2); break; +# endif + } + if (i0 > 1 && r0 == r1) + jit_unget_reg(t0); + } + else { + unldrw(r0, r1, i0); + unldx(r0, i0); + } +} + +static void +_fallback_unldi(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + assert(i1 >= 1 && i1 <= (__WORDSIZE >> 3)); + if (i1 == 1) + ldi_c(r0, i0); + else if (_jitc->unld_algorithm != 0) { + switch (i1) { + case 2: unldi2(r0, i0, 1); break; + case 3: unldi3(r0, i0, 1); break; +# if __WORDSIZE == 32 + default: unldi4(r0, i0); break; +# else + case 4: unldi4(r0, i0, 1); break; + case 5: unldi5(r0, i0, 1); break; + case 6: unldi6(r0, i0, 1); break; + case 7: unldi7(r0, i0, 1); break; + default: unldi8(r0, i0); break; +# endif + } + } + else { + unldiw(r0, i0, i1); + unldx(r0, i1); + } +} + +static void +_fallback_unldr_u(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; + assert(i0 >= 1 && i0 <= (__WORDSIZE >> 3)); + if (i0 == 1) + ldr_uc(r0, r1); + else if (_jitc->unld_algorithm != 0) { + if (r0 == r1) { + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + movr(r2, r0); + } + else + r2 = r1; + switch (i0) { + case 2: unldr2(r0, r2, 0); break; + case 3: unldr3(r0, r2, 0); break; +# if __WORDSIZE == 32 + default: unldr4(r0, r2); break; +# else + case 4: unldr4(r0, r2, 0); break; + case 5: unldr5(r0, r2, 0); break; + case 6: unldr6(r0, r2, 0); break; + case 7: unldr7(r0, r2, 0); break; + default: unldr8(r0, r2); break; +# endif + } + if (i0 > 1 && r0 == r1) + jit_unget_reg(t0); + } + else { + unldrw(r0, r1, i0); + unldx_u(r0, i0); + } +} + +static void +_fallback_unldi_u(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + assert(i1 >= 1 && i1 <= (__WORDSIZE >> 3)); + if (i1 == 1) + ldi_uc(r0, i0); + else if (_jitc->unld_algorithm != 0) { + switch (i1) { + case 2: unldi2(r0, i0, 0); break; + case 3: unldi3(r0, i0, 0); break; +# if __WORDSIZE == 32 + default: unldi4(r0, i0); break; +# else + case 4: unldi4(r0, i0, 0); break; + case 5: unldi5(r0, i0, 0); break; + case 6: unldi6(r0, i0, 0); break; + case 7: unldi7(r0, i0, 0); break; + default: unldi8(r0, i0); break; +# endif + } + + } + else { + unldiw(r0, i0, i1); + unldx_u(r0, i1); + } +} + +static void +_fallback_unstr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_word_t done; + jit_int32_t t0, t1, t2, t3, t4, t5; + assert(i0 > 0 && i0 <= (__WORDSIZE >> 3)); + if (i0 == 1) + str_c(r0, r1); + else if (_jitc->unst_algorithm == 0) { + switch (i0) { + case 2: unstr2(r1, r0); break; + case 3: unstr3(r1, r0); break; +# if __WORDSIZE == 32 + default: unstr4(r1, r0); break; +# else + case 4: unstr4(r1, r0); break; + case 5: unstr5(r1, r0); break; + case 6: unstr6(r1, r0); break; + case 7: unstr7(r1, r0); break; + default: unstr8(r1, r0); break; +# endif + } + } + else { + t0 = fallback_jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); + t2 = fallback_jit_get_reg(jit_class_gpr); + t3 = fallback_jit_get_reg(jit_class_gpr); + /* Zero out top bits and keep value to store in t0 */ + if (i0 != sizeof(jit_word_t)) { + lshi(rn(t3), r1, (sizeof(jit_word_t) - i0) << 3); +#if __BYTE_ORDER == __LITTLE_ENDIAN + rshi_u(rn(t3), rn(t3), (sizeof(jit_word_t) - i0) << 3); +#endif + } + else + movr(rn(t3), r1); + /* Check alignment */ + andi(rn(t2), r0, sizeof(jit_word_t) - 1); + /* Multiply by 8 */ + lshi(rn(t2), rn(t2), 3); + /* Split values to store (assume will need two stores) */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + qlshr_u(rn(t0), rn(t1), rn(t3), rn(t2)); +#else + qrshr_u(rn(t0), rn(t1), rn(t3), rn(t2)); +#endif + /* Generate masks for values in memory */ + if (i0 == sizeof(jit_word_t)) + movi(rn(t3), -1); + else { +#if __BYTE_ORDER == __BIG_ENDIAN + movi(rn(t3), ((1L << (i0 << 3)) - 1) << + ((sizeof(jit_word_t) - i0) << 3)); +#else + movi(rn(t3), (1L << (i0 << 3)) - 1); +#endif + } +#if __BYTE_ORDER == __LITTLE_ENDIAN + qlshr_u(rn(t2), rn(t3), rn(t3), rn(t2)); +#else + qrshr_u(rn(t2), rn(t3), rn(t3), rn(t2)); +#endif + comr(rn(t2), rn(t2)); + comr(rn(t3), rn(t3)); + /* Allocate two extra registers later in case need temps for *q?shr_u */ + t4 = fallback_jit_get_reg(jit_class_gpr); + t5 = fallback_jit_get_reg(jit_class_gpr); + /* Store words */ + andi(rn(t4), r0, -(jit_word_t)sizeof(jit_word_t)); + ldr(rn(t5), rn(t4)); + andr(rn(t5), rn(t5), rn(t2)); + orr(rn(t0), rn(t0), rn(t5)); + str(rn(t4), rn(t0)); + /* Make sure to not read/write on possibly unmaped memory */ + addi(rn(t5), rn(t4), i0); + done = fallback_blei(_jit->pc.w, rn(t5), sizeof(jit_word_t)); + /* Store second word if vlaue crosses a word boundary */ + ldxi(rn(t5), rn(t4), sizeof(jit_word_t)); + andr(rn(t5), rn(t5), rn(t3)); + orr(rn(t1), rn(t1), rn(t5)); + stxi(sizeof(jit_word_t), rn(t4), rn(t1)); + /* Finished */ + fallback_flush(); + fallback_patch_blei(done, _jit->pc.w); + /* Generic/simple algorithm needs 6 temporaries, as it cannot + * change any of the argument registers, might need to truncate + * the value to store, and need a pair for values to store and + * another for the masks. */ + jit_unget_reg(t5); + jit_unget_reg(t4); + jit_unget_reg(t3); + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); + } +} + +static void +_fallback_unsti(jit_state_t *_jit, + jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t done, address; + jit_int32_t t0, t1, t2, t3, t4; + assert(i1 > 0 && i1 <= (__WORDSIZE >> 3)); + if (i1 == 1) + sti_c(i0, r0); + else if (_jitc->unst_algorithm == 0) { + switch (i1) { + case 1: sti_c(i0, r0); break; + case 2: unsti2(r0, i0); break; + case 3: unsti3(r0, i0); break; +# if __WORDSIZE == 32 + default: unsti4(r0, i0); break; +# else + case 4: unsti4(r0, i0); break; + case 5: unsti5(r0, i0); break; + case 6: unsti6(r0, i0); break; + case 7: unsti7(r0, i0); break; + default: unsti8(r0, i0); break; +# endif + } + } + else { + t0 = fallback_jit_get_reg(jit_class_gpr); + t1 = fallback_jit_get_reg(jit_class_gpr); + t2 = fallback_jit_get_reg(jit_class_gpr); + t3 = fallback_jit_get_reg(jit_class_gpr); + t4 = fallback_jit_get_reg(jit_class_gpr); + /* Zero out top bits and keep value to store in t0 */ + if (i1 != sizeof(jit_word_t)) { + lshi(rn(t2), r0, (sizeof(jit_word_t) - i1) << 3); +#if __BYTE_ORDER == __LITTLE_ENDIAN + rshi_u(rn(t2), rn(t2), (sizeof(jit_word_t) - i1) << 3); + qlshi_u(rn(t0), rn(t1), rn(t2), + (i0 & (sizeof(jit_word_t) - 1)) << 3); +#else + qrshi_u(rn(t0), rn(t1), rn(t2), + (i0 & (sizeof(jit_word_t)) - 1) << 3); +#endif + } + else { +#if __BYTE_ORDER == __LITTLE_ENDIAN + qlshi_u(rn(t0), rn(t1), r0, + (i0 & (sizeof(jit_word_t)) - 1) << 3); +#else + qrshi_u(rn(t0), rn(t1), r0, + (i0 & (sizeof(jit_word_t)) - 1) << 3); +#endif + } + /* Generate masks for values in memory */ + if (i1 == sizeof(jit_word_t)) + movi(rn(t2), -1); + else { +#if __BYTE_ORDER == __BIG_ENDIAN + movi(rn(t2), ((1L << (i1 << 3)) - 1) << + ((sizeof(jit_word_t) - i1) << 3)); +#else + movi(rn(t2), (1L << (i1 << 3)) - 1); +#endif + } +#if __BYTE_ORDER == __LITTLE_ENDIAN + qlshi_u(rn(t2), rn(t3), rn(t2), (i0 & (sizeof(jit_word_t)) - 1) << 3); +#else + qrshi_u(rn(t2), rn(t3), rn(t2), (i0 & (sizeof(jit_word_t)) - 1) << 3); +#endif + comr(rn(t2), rn(t2)); + comr(rn(t3), rn(t3)); + /* Store words */ + address = i0 & -(jit_word_t)sizeof(jit_word_t); + ldi(rn(t4), address); + andr(rn(t4), rn(t4), rn(t2)); + orr(rn(t0), rn(t0), rn(t4)); + sti(address, rn(t0)); + if (address + i1 > sizeof(jit_word_t)) { + address += sizeof(jit_word_t); + ldi(rn(t4), address); + andr(rn(t4), rn(t4), rn(t3)); + orr(rn(t1), rn(t1), rn(t4)); + sti(address, rn(t1)); + } + jit_unget_reg(t4); + jit_unget_reg(t3); + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); + } +} + +# ifdef fallback_unldr_x +static void +_fallback_unldr_x(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; +# if __WORDSIZE == 32 + jit_int32_t t1, r3; +# endif + assert(i0 == 4 || i0 == 8); + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i0 == 4) { + unldr(r2, r1, 4); + movr_w_f(r0, r2); + } + else { +# if __WORDSIZE == 32 + t1 = fallback_jit_get_reg(jit_class_gpr); + r3 = rn(t1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + unldr(r2, r1, 4); + addi(r3, r1, 4); + unldr(r3, r3, 4); +# else + unldr(r3, r1, 4); + addi(r2, r1, 4); + unldr(r2, r2, 4); +# endif + movr_ww_d(r0, r2, r3); + jit_unget_reg(t1); +# else + unldr(r2, r1, 8); + movr_w_d(r0, r2); +# endif + } + jit_unget_reg(t0); +} +# endif + +# ifdef fallback_unldi_x +static void +_fallback_unldi_x(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; +# if __WORDSIZE == 32 + jit_int32_t t1, r3; +# endif + assert(i1 == 4 || i1 == 8); + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i1 == 4) { + unldi(r2, i0, 4); + movr_w_f(r0, r2); + } + else { +# if __WORDSIZE == 32 + t1 = fallback_jit_get_reg(jit_class_gpr); + r3 = rn(t1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + unldi(r2, i0, 4); + unldi(r3, i0 + 4, 4); +# else + unldi(r3, i0, 4); + unldi(r2, i0 + 4, 4); +# endif + movr_ww_d(r0, r3, r2); + jit_unget_reg(t1); +# else + unldi(r2, i0, 8); + movr_w_d(r0, r2); +# endif + } + jit_unget_reg(t0); +} +# endif + +# ifdef fallback_unstr_x +static void +_fallback_unstr_x(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; +# if __WORDSIZE == 32 + jit_int32_t t1, r3; +# endif + assert(i0 == 4 || i0 == 8); + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i0 == 4) { + movr_f_w(r2, r1); + unstr(r0, r2, 4); + } + else { +# if __WORDSIZE == 32 + t1 = fallback_jit_get_reg(jit_class_gpr); + r3 = rn(t1); + movr_d_ww(r2, r3, r1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + unstr(r0, r2, 4); + addi(r2, r0, 4); + unstr(r2, r3, 4); +# else + unstr(r0, r3, 4); + addi(r3, r0, 4); + unstr(r3, r2, 4); +# endif + jit_unget_reg(t1); +# else + movr_d_w(r2, r1); + unstr(r0, r2, 8); +# endif + } + jit_unget_reg(t0); +} +# endif + +# ifdef fallback_unsti_x +static void +_fallback_unsti_x(jit_state_t *_jit, + jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t t0, r2; +# if __WORDSIZE == 32 + jit_int32_t t1, r3; +# endif + assert(i1 == 4 || i1 == 8); + t0 = fallback_jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i1 == 4) { + movr_f_w(r2, r0); + unsti(i0, r2, 4); + } + else { +# if __WORDSIZE == 32 + t1 = fallback_jit_get_reg(jit_class_gpr); + r3 = rn(t1); + movr_d_ww(r2, r3, r0); +# if __BYTE_ORDER == __LITTLE_ENDIAN + unsti(i0, r3, 4); + unsti(i0 + 4, r2, 4); +# else + unsti(i0, r2, 4); + unsti(i0 + 4, r3, 4); +# endif + jit_unget_reg(t1); +# else + movr_d_w(r2, r0); + unsti(i0, r2, 8); +# endif + } + jit_unget_reg(t0); +} +# endif #endif