X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_arm-cpu.c;fp=deps%2Flightning%2Flib%2Fjit_arm-cpu.c;h=149db9abba9b226faa74df57c114ba3792f7f1ba;hb=ba86ff938a6b17c171dd68ebdf897ca3e30550f8;hp=a0852a2cd55c8716487ec55276f1fc1c016c4b44;hpb=56e500f3428614e677ba5e9719f002046e87d980;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_arm-cpu.c b/deps/lightning/lib/jit_arm-cpu.c index a0852a2c..149db9ab 100644 --- a/deps/lightning/lib/jit_arm-cpu.c +++ b/deps/lightning/lib/jit_arm-cpu.c @@ -18,9 +18,16 @@ */ #if PROTO -# define stxi(i0,r0,r1) stxi_i(i0,r0,r1) -# define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0) +# define jit_unaligned_p() (jit_cpu.unaligned) +# define jit_vfp_unaligned_p() (jit_cpu.vfp_unaligned) # define ldr(r0,r1) ldr_i(r0,r1) +# define ldi(r0,i0) ldi_i(r0,i0) +# define ldxr(r0,r1,r2) ldxr_i(r0,r1,r2) +# define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0) +# define str(r0,r1) str_i(r0,r1) +# define sti(i0,r0) sti_i(i0,r0) +# define stxr(r0,r1,r2) stxr_i(r0,r1,r2) +# define stxi(i0,r0,r1) stxi_i(i0,r0,r1) # define _s20P(d) ((d) >= -(int)0x80000 && d <= 0x7ffff) # define _s24P(d) ((d) >= -(int)0x800000 && d <= 0x7fffff) # define _u3(v) ((v) & 0x7) @@ -192,6 +199,12 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define THUMB2_CLZ 0xfab0f080 # define THUMB2_RBIT 0xfa90f0a0 # define ARM_RBIT 0x06f00030 +# define THUMB2_SBFX 0xf3400000 +# define ARM_SBFX 0x07a00050 +# define THUMB2_UBFX 0xf3c00000 +# define ARM_UBFX 0x07e00050 +# define THUMB2_BFI 0xf3600000 +# define ARM_BFI 0x07c00010 /* << ARMv6t2 */ # define ARM_CLZ 0x01600010 /* >> ARMv7 */ @@ -224,6 +237,9 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define THUMB_ASRI 0x1000 # define THUMB2_ASRI 0xea4f0020 # define ARM_ROR 0x00000060 +# define THUMB_ROR 0x41c0 +# define THUMB2_ROR 0xfa60f000 +# define THUMB2_RORI 0xea4f0030 # define ARM_CMP 0x01500000 # define THUMB_CMP 0x4280 # define THUMB_CMPX 0x4500 @@ -456,6 +472,19 @@ static void _tdmb(jit_state_t *_jit, int im); # define NOT(rd,rm) CC_NOT(ARM_CC_AL,rd,rm) # define T1_NOT(rd,rm) T1_MVN(rd,rm) # define T2_NOT(rd,rm) T2_MVN(rd,rm) +# define torrlw(o,rd,rn,lsb,wm1) _torrlw(_jit,o,rd,rn,lsb,wm1) +static void _torrlw(jit_state_t*,int,int,int,int,int); +# define corrlw(cc,o,rd,rn,lsb,wm1) _corrlw(_jit,cc,o,rd,rn,lsb,wm1) +static void _corrlw(jit_state_t*,int,int,int,int,int,int); +# define T2_SBFX(rd,rn,lsb,wm1) torrlw(THUMB2_SBFX,rd,rn,lsb,wm1) +# define CC_SBFX(cc,o,rd,rn,lsb,wm1) corrlw(cc,o,rd,rn,lsb,wm1) +# define SBFX(rd,rn,lsb,wm1) CC_SBFX(ARM_CC_AL,ARM_SBFX,rd,rn,lsb,wm1) +# define T2_UBFX(rd,rn,lsb,wm1) torrlw(THUMB2_UBFX,rd,rn,lsb,wm1) +# define CC_UBFX(cc,rd,rn,lsb,wm1) corrlw(cc,o,rd,rn,lsb,wm1) +# define UBFX(rd,rn,lsb,wm1) CC_SBFX(ARM_CC_AL,ARM_UBFX,rd,rn,lsb,wm1) +# define T2_BFI(rd,rn,lsb,wm1) torrlw(THUMB2_BFI,rd,rn,lsb,wm1) +# define CC_BFI(cc,rd,rn,lsb,wm1) corrlw(cc,o,rd,rn,lsb,wm1) +# define BFI(rd,rn,lsb,wm1) CC_SBFX(ARM_CC_AL,ARM_BFI,rd,rn,lsb,wm1) # define T2_CLZ(rd,rm) torrr(THUMB2_CLZ,rm,rd,rm) # define CC_CLZ(cc,rd,rm) corrrr(cc,ARM_CLZ,_R15_REGNO,rd,_R15_REGNO,rm) # define CLZ(rd,rm) CC_CLZ(ARM_CC_AL,rd,rm) @@ -637,6 +666,13 @@ static void _tdmb(jit_state_t *_jit, int im); # define ASRI(rd,rn,im) CC_ASRI(ARM_CC_AL,rd,rn,im) # define T1_ASRI(rd,rm,im) is(THUMB_ASRI|(_u5(im)<<6)|(_u3(rm)<<3)|_u3(rd)) # define T2_ASRI(rd,rm,im) tshift(THUMB2_ASRI,rd,rm,im) +# define CC_ROR(cc,rd,rn,rm) CC_SHIFT(cc,ARM_ROR|ARM_R,rd,rm,rn,0) +# define ROR(rd,rn,rm) CC_ROR(ARM_CC_AL,rd,rn,rm) +# define T1_ROR(rdn,rm) is(THUMB_ROR|(_u3(rm)<<3)|_u3(rdn)) +# define T2_ROR(rd,rn,rm) torrr(THUMB2_ROR,rn,rd,rm) +# define CC_RORI(cc,rd,rn,im) CC_SHIFT(cc,ARM_ROR,rd,0,rn,im) +# define RORI(rd,rn,im) CC_RORI(ARM_CC_AL,rd,rn,im) +# define T2_RORI(rd,rm,im) tshift(THUMB2_RORI,rd,rm,im) # define CC_CMP(cc,rn,rm) corrr(cc,ARM_CMP,rn,0,rm) # define CMP(rn,rm) CC_CMP(ARM_CC_AL,rn,rm) # define T1_CMP(rn,rm) is(THUMB_CMP|(_u3(rm)<<3)|_u3(rn)) @@ -914,6 +950,10 @@ static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); # define ctzr(r0, r1) _ctzr(_jit, r0, r1) static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define rbitr(r0, r1) _rbitr(_jit, r0, r1) +static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t); +# define popcntr(r0, r1) _popcntr(_jit, r0, r1) +static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t); # define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -944,6 +984,16 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define hmulr(r0,r1,r2) ihmulr(r0,r1,r2,1) +# define hmulr_u(r0,r1,r2) ihmulr(r0,r1,r2,0) +# define ihmulr(r0,r1,r2,cc) _ihmulr(_jit,r0,r1,r2,cc) +static void _ihmulr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_bool_t); +# define hmuli(r0,r1,i0) ihmuli(r0,r1,i0,1) +# define hmuli_u(r0,r1,i0) ihmuli(r0,r1,i0,0) +# define ihmuli(r0,r1,i0,cc) _ihmuli(_jit,r0,r1,i0,cc) +static void _ihmuli(jit_state_t*,jit_int32_t,jit_int32_t, + jit_word_t,jit_bool_t); # define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) # define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) # define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) @@ -1006,6 +1056,13 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define lrotr(r0,r1,r2) _lrotr(_jit,r0,r1,r2) +static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define lroti(r0,r1,i0) rroti(r0,r1,32-i0) +# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2) +static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rroti(r0,r1,i0) _rroti(_jit,r0,r1,i0) +static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ccr(ct,cf,r0,r1,r2) _ccr(_jit,ct,cf,r0,r1,r2) static void _ccr(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t); # define cci(ct,cf,r0,r1,i0) _cci(_jit,ct,cf,r0,r1,i0) @@ -1134,6 +1191,14 @@ static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t); static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0) static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define unldr(r0, r1, i0) _unldr(_jit, r0, r1, i0) +static void _unldr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define unldi(r0, i0, i1) _unldi(_jit, r0, i0, i1) +static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define unldr_u(r0, r1, i0) _unldr_u(_jit, r0, r1, i0) +static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define unldi_u(r0, i0, i1) _unldi_u(_jit, r0, i0, i1) +static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); # define str_c(r0,r1) _str_c(_jit,r0,r1) static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t); # define sti_c(i0,r0) _sti_c(_jit,i0,r0) @@ -1158,10 +1223,20 @@ static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxi_i(r0,r1,i0) _stxi_i(_jit,r0,r1,i0) static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +#define unstr(r0, r1, i0) _unstr(_jit, r0, r1, i0) +static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define unsti(i0, r0, i1) _unsti(_jit, i0, r0, i1) +static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); # define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define extr(r0,r1,i0,i1) _extr(_jit,r0,r1,i0,i1) +static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +# define extr_u(r0,r1,i0,i1) _extr_u(_jit,r0,r1,i0,i1) +static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +# define depr(r0,r1,i0,i1) _depr(_jit,r0,r1,i0,i1) +static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) _extr_uc(_jit,r0,r1) @@ -1186,6 +1261,8 @@ static void _vastart(jit_state_t*, jit_int32_t); static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); # define patch_at(kind,jump,label) _patch_at(_jit,kind,jump,label) static void _patch_at(jit_state_t*,jit_int32_t,jit_word_t,jit_word_t); + +# define NEED_FALLBACK_CASX 1 #endif #if CODE @@ -1587,6 +1664,27 @@ _tdmb(jit_state_t *_jit, int im) iss(thumb.s[0], thumb.s[1]); } +static void +_torrlw(jit_state_t *_jit, int o, int rd, int rn, int lsb, int wm1) +{ + jit_thumb_t thumb; + assert(!(o & 0x000f0fdf)); + assert(!(lsb & 0xffffffe0)); + assert(!(wm1 & 0xffffffe0)); + thumb.i = o|(_u4(rn)<<16)|((lsb&28)<<10)|(_u4(rd)<<8)|((lsb&3)<<6)|_u5(wm1); + iss(thumb.s[0], thumb.s[1]); +} + +static void +_corrlw(jit_state_t *_jit, int cc, int o, int rd, int rn, int lsb, int wm1) +{ + assert(!(cc & 0x0fffffff)); + assert(!(o & 0xf000f00f)); + assert(!(lsb & 0xffffffe0)); + assert(!(wm1 & 0xffffffe0)); + ii(cc|o|(_u5(wm1)<<16)|(_u4(rd)<<12)|(_u5(lsb)<<7)|_u4(rn)); +} + static void _nop(jit_state_t *_jit, jit_int32_t i0) { @@ -1813,6 +1911,29 @@ _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) fallback_ctz(r0, r1); } +static void +_rbitr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_armv7_p()) { /* armv6t2 actually */ + if (jit_thumb_p()) + T2_RBIT(r0, r1); + else + RBIT(r0, r1); + } + else + fallback_rbit(r0, r1); +} + +static void +_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + /* FIXME Untested. Apdated from aarch64 pattern. */ + if (jit_cpu.vfp >= 4) + vfp_popcntr(r0, r1); + else + fallback_popcnt(r0, r1); +} + static void _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -2210,6 +2331,29 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_ihmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + iqmulr(rn(reg), r0, r1, r2, sign); + jit_unget_reg(reg); +} + +static void +_ihmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + movi(rn(t1), i0); + iqmulr(rn(t0), r0, r1, rn(t1), sign); + jit_unget_reg(t1); + jit_unget_reg(t0); +} + static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) @@ -2692,6 +2836,47 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) LSRI(r0, r1, i0); } +static void +_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 != r1 && r0 != r2) { + rsbi(r0, r2, 64); + rrotr(r0, r1, r0); + } + else { + reg = jit_get_reg(jit_class_gpr); + rsbi(rn(reg), r2, 64); + rrotr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (jit_thumb_p()) { + if (!jit_no_set_flags() && (r0|r1|r2) < 8 && r0 == r1) + T1_ROR(r0, r2); + else + T2_ROR(r0, r1, r2); + } + else + ROR(r0, r1, r2); +} + +static void +_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + assert(i0 >= 0 && i0 <= 31); + if (i0 == 0) + movr(r0, r1); + else if (jit_thumb_p()) + T2_RORI(r0, r1, i0); + else + RORI(r0, r1, i0); +} + static void _ccr(jit_state_t *_jit, int ct, int cf, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) @@ -3602,6 +3787,44 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_unldr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unldr(r0, r1, i0); + else + generic_unldr(r0, r1, i0); +} + +static void +_unldi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if (jit_unaligned_p()) + fallback_unldi(r0, i0, i1); + else + generic_unldi(r0, i0, i1); +} + +static void +_unldr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unldr_u(r0, r1, i0); + else + generic_unldr_u(r0, r1, i0); +} + +static void +_unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if (jit_unaligned_p()) + fallback_unldi_u(r0, i0, i1); + else + generic_unldi_u(r0, i0, i1); +} + static void _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -3821,6 +4044,24 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } +static void +_unstr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unstr(r0, r1, i0); + else + generic_unstr(r0, r1, i0); +} + +static void +_unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + if (jit_unaligned_p()) + fallback_unsti(i0, r0, i1); + else + generic_unsti(i0, r0, i1); +} + static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -3867,6 +4108,57 @@ _bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } } +static void +_extr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1,jit_word_t i0, jit_word_t i1) +{ + if (jit_armv7_p()) { /* armv6t2 actually */ +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + if (jit_thumb_p()) + T2_SBFX(r0, r1, i0, i1 - 1); + else + SBFX(r0, r1, i0, i1 - 1); + } + else + fallback_ext(r0, r1, i0, i1); +} + +static void +_extr_u(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1,jit_word_t i0, jit_word_t i1) +{ + if (jit_armv7_p()) { /* armv6t2 actually */ +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + if (jit_thumb_p()) + T2_UBFX(r0, r1, i0, i1 - 1); + else + UBFX(r0, r1, i0, i1 - 1); + } + else + fallback_ext_u(r0, r1, i0, i1); +} + +static void +_depr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1,jit_word_t i0, jit_word_t i1) +{ + if (jit_armv7_p()) { /* armv6t2 actually */ +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + if (jit_thumb_p()) + T2_BFI(r0, r1, i0, i0 + i1 - 1); + else + BFI(r0, r1, i0, i0 + i1 - 1); + } + else + fallback_dep(r0, r1, i0, i1); +} + static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) {