X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_ia64-cpu.c;h=8908ca77063a25cb363201ea275640be56b4f11e;hb=d481fb64f2aac7a36532142cda11fa43f5ca792f;hp=b28e8f1afada3f60b01e02274e7431a8145e100c;hpb=ba3814c189d3bd2332b66fb6c633a7d028e618fe;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_ia64-cpu.c b/deps/lightning/lib/jit_ia64-cpu.c index b28e8f1a..8908ca77 100644 --- a/deps/lightning/lib/jit_ia64-cpu.c +++ b/deps/lightning/lib/jit_ia64-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -18,6 +18,7 @@ */ #if PROTO +#define jit_unaligned_p() (jit_cpu.unaligned) #define stack_framesize 144 #define params_offset 16 #define INST_NONE 0 /* should never be generated */ @@ -206,10 +207,10 @@ typedef enum { #define ldr(r0,r1) ldr_l(r0,r1) #define ldi(r0,i0) ldi_l(r0,i0) -#define str(r0,r1) str_l(r0,r1) -#define sti(i0,r0) str_l(i0,r0) #define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2) #define ldxi(r0,r1,i0) ldxi_l(r0,r1,i0) +#define str(r0,r1) str_l(r0,r1) +#define sti(i0,r0) sti_l(i0,r0) #define stxr(r0,r1,r2) stxr_l(r0,r1,r2) #define stxi(i0,r0,r1) stxi_l(i0,r0,r1) @@ -735,8 +736,8 @@ static void _X5(jit_state_t*,jit_word_t, /* dep */ #define DEP_Z(r1,r2,pos,len) I12(len,pos,r2,r1) #define DEPI_Z(r1,im,pos,len) I13(len,pos,im,r1) -#define DEPs(r1,r2,r3,pos,len) I14(1,len,r3,pos,r1) -#define DEPu(r1,r2,r3,pos,len) I14(0,len,r3,pos,r1) +#define DEPs(r1,r3,pos,len) I14(1,len,r3,pos,r1) +#define DEPu(r1,r3,pos,len) I14(0,len,r3,pos,r1) #define DEP(r1,r2,r3,pos,len) I15(pos,len,r3,r2,r1) /* epc */ #define EPC() B8(0x10) @@ -1175,6 +1176,8 @@ static void _X5(jit_state_t*,jit_word_t, #define ZXT2(r1,r3) I29(0x11,r3,r1) #define ZXT4(r1,r3) I29(0x12,r3,r1) +# define nop(i0) _nop(_jit,i0) +static void _nop(jit_state_t*, jit_int32_t); #define addr(r0,r1,r2) ADD(r0,r1,r2) #define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1203,6 +1206,12 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define hmulr(r0,r1,r2) mulh(r0,r1,r2,1) +#define hmuli(r0,r1,i0) _hmuli(_jit,r0,r1,i0) +static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define hmulr_u(r0,r1,r2) mulh(r0,r1,r2,0) +#define hmuli_u(r0,r1,i0) _hmuli_u(_jit,r0,r1,i0) +static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define divr(r0,r1,r2) _divr(_jit,r0,r1,r2) static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define divi(r0,r1,i0) _divi(_jit,r0,r1,i0) @@ -1259,6 +1268,26 @@ static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define rshr_u(r0,r1,r2) SHR_U(r0,r1,r2) #define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qlshr(r0,r1,r2,r3) xlshr(1,r0,r1,r2,r3) +# define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3) +# define xlshr(s,r0,r1,r2,r3) _xlshr(_jit,s,r0,r1,r2,r3) +static void +_xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0) +# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0) +# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0) +static void +_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3) +# define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3) +# define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3) +static void +_xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0) +# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0) +# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0) +static void +_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); #define ltr(r0,r1,r2) _ltr(_jit,r0,r1,r2) static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define lti(r0,r1,i0) _lti(_jit,r0,r1,i0) @@ -1299,6 +1328,15 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define nei(r0,r1,i0) _nei(_jit,r0,r1,i0) static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +#define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +#define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +#define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); +#define popcntr(r0, r1) POPCNT(r0, r1) #define negr(r0,r1) subr(r0,0,r1) #define comr(r0,r1) ANDCMI(r0,-1,r1) #define movr(r0,r1) _movr(_jit,r0,r1) @@ -1321,6 +1359,12 @@ static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ul(r0,r1) MUX1(r0,r1,MUX_REV) +#define extr(r0,r1,i0,i1) _extr(_jit,r0,r1,i0,i1) +static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +#define extr_u(r0,r1,i0,i1) _extr_u(_jit,r0,r1,i0,i1) +static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); +#define depr(r0,r1,i0,i1) _depr(_jit,r0,r1,i0,i1) +static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t); #define extr_c(r0,r1) SXT1(r0,r1) #define extr_uc(r0,r1) ZXT1(r0,r1) #define extr_s(r0,r1) SXT2(r0,r1) @@ -1467,6 +1511,14 @@ static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define ldxi_l(r0,r1,i0) _ldxi_l(_jit,r0,r1,i0) static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define unldr(r0, r1, i0) _unldr(_jit, r0, r1, i0) +static void _unldr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define unldi(r0, i0, i1) _unldi(_jit, r0, i0, i1) +static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define unldr_u(r0, r1, i0) _unldr_u(_jit, r0, r1, i0) +static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define unldi_u(r0, i0, i1) _unldi_u(_jit, r0, i0, i1) +static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); #define str_c(r0,r1) ST1(r0,r1) #define sti_c(i0,r0) _sti_c(_jit,i0,r0) static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t); @@ -1495,10 +1547,14 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +#define unstr(r0, r1, i0) _unstr(_jit, r0, r1, i0) +static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define unsti(i0, r0, i1) _unsti(_jit, i0, r0, i1) +static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); #define jmpr(r0) _jmpr(_jit,r0) static void _jmpr(jit_state_t*,jit_int32_t); #define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); #define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); #define callr(r0) _callr(_jit,r0) @@ -1515,7 +1571,7 @@ static void _epilog(jit_state_t*,jit_node_t*); static void _vastart(jit_state_t*, jit_int32_t); # define vaarg(r0, r1) _vaarg(_jit, r0, r1) static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); -#define patch_at(node,instr,label) _patch_at(_jit,node,instr,label) +#define patch_at(code,instr,label) _patch_at(_jit,code,instr,label) static void _patch_at(jit_state_t*,jit_code_t,jit_word_t,jit_word_t); #endif @@ -2454,7 +2510,7 @@ _I9(jit_state_t *_jit, jit_word_t _p, TSTREG1(r3); TSTPRED(_p); TSTREG1(r1); - inst((7L<<37)|(1L<<34)|(1L<<34)|(1L<<33)| + inst((7L<<37)|(1L<<34)|(1L<<33)| (x2<<30)|(1L<<28)|(r3<<20)|(r1<<6)|_p, INST_I); SETREG(r1); } @@ -2483,7 +2539,7 @@ _I11(jit_state_t *_jit, jit_word_t _p, assert(!(_p & ~0x3fL)); assert(!(len & ~0x3fL)); assert(!(r3 & ~0x7fL)); - assert(!(pos & ~0x1fL)); + assert(!(pos & ~0x3fL)); assert(!(y & ~0x1L)); assert(!(r1 & ~0x7fL)); TSTREG1(r3); @@ -3029,7 +3085,7 @@ _M29(jit_state_t *_jit, jit_word_t _p, jit_word_t ar, jit_word_t r2) { assert(!(_p & ~0x3fL)); - assert(!(ar & ~0x7L)); + assert(!(ar & ~0x7fL)); assert(!(r2 & ~0x7fL)); TSTREG1(r2); TSTPRED(_p); @@ -3453,6 +3509,66 @@ _X5(jit_state_t *_jit, jit_word_t _p, inst((i1<<36)|(1L<<27)|(y<<26)|(i20<<6)|_p, INST_X); } +static void +_nop(jit_state_t *_jit, jit_int32_t i0) +{ + for (; i0 > 0; i0 -= 8) { + NOP_M(0); + sync(); + } + assert(i0 == 0); +} + +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.clz) + CLZ(r0, r1); + else + fallback_clz(r0, r1); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.clz) { + comr(r0, r1); + clzr(r0, r0); + } + else + fallback_clo(r0, r1); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.clz) { + comr(r0, r1); + ctzr(r0, r0); + } + else + fallback_cto(r0, r1); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (jit_cpu.clz) { + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + negr(rn(t0), r1); + andr(rn(t0), rn(t0), r1); + clzr(r0, rn(t0)); + xori(rn(t1), r0, __WORDSIZE - 1); + movnr(r0, rn(t1), rn(t0)); + jit_unget_reg(t0); + jit_unget_reg(t1); + } + else + fallback_ctz(r0, r1); +} + static void _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -3489,28 +3605,93 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) static void _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - jit_word_t w; - w = beqi(_jit->pc.w, r2, 0); - movr(r0, r1); - patch_at(w, _jit->pc.w); + CMP_EQ(PR_6, PR_7, r2, GR_0); + MOV_p(r0, r1, PR_7); } static void _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - jit_word_t w; - w = bnei(_jit->pc.w, r2, 0); - movr(r0, r1); - patch_at(w, _jit->pc.w); + CMP_EQ(PR_6, PR_7, r2, GR_0); + MOV_p(r0, r1, PR_6); +} + +static void +_extr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1) +{ + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if ( i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + EXTR(r0, r1, i0, i1 - 1); + } +} + +static void +_extr_u(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1) +{ + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if (i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + EXTR_U(r0, r1, i0, i1 - 1); + } +} + +static void +_depr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0; + jit_word_t mask; + assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE); + if (i1 == __WORDSIZE) + movr(r0, r1); + else { +# if __BYTE_ORDER == __BIG_ENDIAN + i0 = __WORDSIZE - (i0 + i1); +# endif + if (i1 <16) + DEP(r0, r1, r0, 63 - i0, i1 - 1); + else { + t0 = jit_get_reg(jit_class_gpr); + mask = ((1L << i1) - 1) << i0; + movr(rn(t0), r0); + DEP_Z(r0, r1, 63 - i0, i1 - 1); + andi(rn(t0), rn(t0), ~mask); + orr(r0, r0, rn(t0)); + jit_unget_reg(t0); + } + } } static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - fallback_casx(r0, r1, r2, r3, i0); + jit_int32_t r1_reg, iscasi; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + sync(); + MOV_M_ar_rn(AR_CCV, r2); + CMPXCHG8_ACQ(r0, r1, r3); + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); } + static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -3715,6 +3896,26 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + hmulr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + #if !defined(__GNUC__) static long __divdi3(long u, long v) @@ -4021,6 +4222,201 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_xlshr(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_bool_t branch; + jit_word_t over, zero, done, done_over; + jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3; + s0 = jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + if ((s1 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) { + t1 = rn(s1); + branch = 0; + } + else + branch = 1; + rsbi(t0, t3, __WORDSIZE); + lshr(r0, t2, t3); + if (sign) + rshr(r1, t2, t0); + else + rshr_u(r1, t2, t0); + if (branch) { + zero = beqi(_jit->pc.w, t3, 0); + over = beqi(_jit->pc.w, t3, __WORDSIZE); + done = jmpi(_jit->pc.w); + patch_at(jit_code_beqi, over, _jit->pc.w); + /* overflow */ + movi(r0, 0); + done_over = jmpi(_jit->pc.w); + /* zero */ + patch_at(jit_code_beqi, zero, _jit->pc.w); + if (sign) + rshi(r1, t2, __WORDSIZE - 1); + else + movi(r1, 0); + patch_at(jit_code_jmpi, done, _jit->pc.w); + patch_at(jit_code_jmpi, done_over, _jit->pc.w); + } + else { + if (sign) + rshi(t0, t2, __WORDSIZE - 1); + else + movi(t0, 0); + /* zero? */ + movzr(r1, t0, t3); + /* Branchless but 4 bytes longer than branching fallback */ + if (sign) + movi(t0, 0); + /* overflow? */ + eqi(t1, t3, __WORDSIZE); + movnr(r0, t0, t1); + jit_unget_reg(s1); + } + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_xlshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + if (sign) + rshi(r1, r2, __WORDSIZE - 1); + else + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + if (sign) + rshi(r1, r2, __WORDSIZE - i0); + else + rshi_u(r1, r2, __WORDSIZE - i0); + lshi(r0, r2, i0); + } +} + +static void +_xrshr(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_bool_t branch; + jit_word_t over, zero, done, done_over; + jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3; + s0 = jit_get_reg(jit_class_gpr); + t0 = rn(s0); + if (r0 == r2 || r1 == r2) { + s2 = jit_get_reg(jit_class_gpr); + t2 = rn(s2); + movr(t2, r2); + } + else + t2 = r2; + if (r0 == r3 || r1 == r3) { + s3 = jit_get_reg(jit_class_gpr); + t3 = rn(s3); + movr(t3, r3); + } + else + t3 = r3; + if ((s1 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) { + t1 = rn(s1); + branch = 0; + } + else + branch = 1; + rsbi(t0, t3, __WORDSIZE); + if (sign) + rshr(r0, t2, t3); + else + rshr_u(r0, t2, t3); + lshr(r1, t2, t0); + if (branch) { + zero = beqi(_jit->pc.w, t3, 0); + over = beqi(_jit->pc.w, t3, __WORDSIZE); + done = jmpi(_jit->pc.w); + patch_at(jit_code_beqi, over, _jit->pc.w); + /* underflow */ + if (sign) + rshi(r0, t2, __WORDSIZE - 1); + else + movi(r0, 0); + done_over = jmpi(_jit->pc.w); + /* zero */ + patch_at(jit_code_beqi, zero, _jit->pc.w); + movi(r1, 0); + patch_at(jit_code_jmpi, done, _jit->pc.w); + patch_at(jit_code_jmpi, done_over, _jit->pc.w); + jit_unget_reg(s1); + } + else { + /* zero? */ + movi(t0, 0); + movzr(r1, t0, t3); + /* underflow? */ + eqi(t1, t3, __WORDSIZE); + if (sign) + rshi(t0, t2, __WORDSIZE - 1); + movnr(r0, t0, t1); + jit_unget_reg(s1); + } + jit_unget_reg(s0); + if (t2 != r2) + jit_unget_reg(s2); + if (t3 != r3) + jit_unget_reg(s3); +} + +static void +_xrshi(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + if (i0 == 0) { + movr(r0, r2); + movi(r1, 0); + } + else if (i0 == __WORDSIZE) { + movr(r1, r2); + if (sign) + rshi(r0, r2, __WORDSIZE - 1); + else + movi(r0, 0); + } + else { + assert((jit_uword_t)i0 <= __WORDSIZE); + lshi(r1, r2, __WORDSIZE - i0); + if (sign) + rshi(r0, r2, i0); + else + rshi_u(r0, r2, i0); + } +} + static void _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -4496,6 +4892,44 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) ldr_l(r0, r1); } +static void +_unldr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unldr(r0, r1, i0); + else + generic_unldr(r0, r1, i0); +} + +static void +_unldi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if (jit_unaligned_p()) + fallback_unldi(r0, i0, i1); + else + generic_unldi(r0, i0, i1); +} + +static void +_unldr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unldr_u(r0, r1, i0); + else + generic_unldr_u(r0, r1, i0); +} + +static void +_unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + if (jit_unaligned_p()) + fallback_unldi_u(r0, i0, i1); + else + generic_unldi_u(r0, i0, i1); +} + static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { @@ -4632,6 +5066,24 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) str_l(r0, r1); } +static void +_unstr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unstr(r0, r1, i0); + else + generic_unstr(r0, r1, i0); +} + +static void +_unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + if (jit_unaligned_p()) + fallback_unsti(i0, r0, i1); + else + generic_unsti(i0, r0, i1); +} + static jit_word_t _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { @@ -5125,16 +5577,18 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) BR(BR_6); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t d; + jit_word_t d, w; sync(); - d = ((jit_word_t)i0 - _jit->pc.w) >> 4; + w = _jit->pc.w; + d = ((jit_word_t)i0 - w) >> 4; if (d >= -16777216 && d <= 16777215) BRI(d); else BRL(d); + return (w); } static jit_word_t @@ -5380,14 +5834,16 @@ _patch_at(jit_state_t *_jit, jit_code_t code, i1 = (ic >> 61) & 0x1L; i41 = (ic >> 22) & 0x1ffffffffffL; i20 = ic & 0xfffffL; - assert((tm & ~1) == TM_M_L_X_ && + if (!((tm & ~1) == TM_M_L_X_ && (s2 & 0xfL<<37) == (0xcL<<37) && - s0 == nop_m); + s0 == nop_m)) + goto short_jump; s1 = i41; s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12); s2 |= (i1<<36)|(i20<<13); break; default: + short_jump: /* Only B1 in slot 0 expected due to need to either * a stop to update predicates, or a sync before * unconditional short branch */