X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_sparc-cpu.c;h=f4ce62130ac9cd4b20f0b19e634be29eb81f199a;hb=02a5662c31c401081716623cc80bb1c4ab1dbb19;hp=ecea5066bec169589f6da40ac053c41508504091;hpb=ba3814c189d3bd2332b66fb6c633a7d028e618fe;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_sparc-cpu.c b/deps/lightning/lib/jit_sparc-cpu.c index ecea5066..f4ce6213 100644 --- a/deps/lightning/lib/jit_sparc-cpu.c +++ b/deps/lightning/lib/jit_sparc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -100,6 +100,9 @@ _f2bp(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t, static void _f3r(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # if __WORDSIZE == 64 +# define f3ri(op, rd, op3, rs1, rs2) _f3ri(_jit, op, rd, op3, rs1, rs2) +static void _f3ri(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define f3rx(op, rd, op3, rs1, rs2) _f3rx(_jit, op, rd, op3, rs1, rs2) static void _f3rx(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); @@ -113,10 +116,15 @@ static void _f3s(jit_state_t*, # define f3t(cond, rs1, i, ri) _f3t(_jit, cond, rs1, i, ri) static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; -# define f3a(op, rd, op3, rs1, rs2) _f3a(_jit, op, rd, op3, rs1, asi, rs2) +# define f3a(op,rd,op3,rs1,asi,rs2) _f3a(_jit, op, rd, op3, rs1, asi, rs2) static void _f3a(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; +# define f2c1(op,rd,op3,rs1,opf,rs2) _f2c1(_jit,op,rd,op3,rs1,opf,rs2) +static void +_f2c1(jit_state_t*,jit_int32_t, jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) + maybe_unused; # define LDSB(rs1, rs2, rd) f3r(3, rd, 9, rs1, rs2) # define LDSBI(rs1, imm, rd) f3i(3, rd, 9, rs1, imm) # define LDSH(rs1, rs2, rd) f3r(3, rd, 10, rs1, rs2) @@ -194,6 +202,11 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SWAP(rs1, rs2, rd) f3r(3, rd, 15, rs1, rs2) # define SWAPI(rs1, imm, rd) f3r(3, rd, 15, rs1, imm) # define SWAPA(rs1, rs2, asi, rd) f3a(3, rd, 23, rs1, asi, rs2) +/* Sparc v9 deprecates SWAP* in favor of CAS*A */ +# define CASA(rs1, rs2, rd) f3a(3, rd, 60, rs1, 128, rs2) +# if __WORDSIZE == 64 +# define CASXA(rs1, rs2, rd) f3a(3, rd, 62, rs1, 128, rs2) +# endif # define NOP() SETHI(0, 0) # define HI(im) ((im) >> 10) # define LO(im) ((im) & 0x3ff) @@ -537,6 +550,7 @@ static void _f3a(jit_state_t*,jit_int32_t, # define UNIMP(imm) f2r(0, 0, 0, imm) # define FLUSH(rs1, rs2) f3r(2, 0, 59, rs1, rs2) # define FLUSHI(rs1, im) f3i(2, 0, 59, rs1, imm) +# define LZCNT(rs2, rd) f2c1(2, rd, 54, 0, 23, rs2) # define nop(i0) _nop(_jit, i0) static void _nop(jit_state_t*, jit_int32_t); # define movr(r0, r1) _movr(_jit, r0, r1) @@ -559,6 +573,16 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, #define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0, r1) XNOR(r1, 0, r0) # define negr(r0, r1) NEG(r1, r0) +# define bitswap(r0, r1) _bitswap(_jit, r0, r1) +static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # define addr(r0, r1, r2) ADD(r1, r2, r0) # define addi(r0, r1, i0) _addi(_jit, r0, r1, i0) static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); @@ -933,13 +957,13 @@ _bm_w(jit_state_t*,jit_bool_t,jit_word_t,jit_int32_t,jit_word_t); # define jmpr(r0) _jmpr(_jit, r0) static void _jmpr(jit_state_t*,jit_int32_t); # define jmpi(i0) _jmpi(_jit, i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit, i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) _callr(_jit, r0) static void _callr(jit_state_t*,jit_int32_t); # define calli(i0) _calli(_jit, i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit, i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(node) _prolog(_jit, node) @@ -1035,6 +1059,26 @@ _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, } # if __WORDSIZE == 64 +static void +_f3ri(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(rs2 & 0xffffffe0)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 1; + v.asi.b = 0; + v.rs2.b = rs2; + ii(v.v); +} + static void _f3rx(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) @@ -1154,6 +1198,26 @@ _f1(jit_state_t *_jit, jit_int32_t op, jit_int32_t disp30) ii(v.v); } +static void +_f2c1(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(res & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(opf & 0xfffffe00)); + assert(!(rs2 & 0xfffffe00)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.opf.b = opf; + v.rs2.b = rs2; + ii(v.v); +} + static void _nop(jit_state_t *_jit, jit_int32_t i0) { @@ -1242,7 +1306,135 @@ static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { - fallback_casx(r0, r1, r2, r3, i0); + jit_int32_t iscasi, r1_reg; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* Do not clobber r2 */ + movr(r0, r3); + /* The CASXA instruction compares the value in register r[rs2] with + * the doubleword in memory pointed to by the doubleword address in + * r[rs1]. If the values are equal, the value in r[rd] is swapped + * with the doubleword pointed to by the doubleword address in r[rs1]. + * If the values are not equal, the contents of the doubleword pointed + * to by r[rs1] replaces the value in r[rd], but the memory location + * remains unchanged. + */ +# if __WORDSIZE == 32 + CASA(r1, r2, r0); +# else + CASXA(r1, r2, r0); +# endif + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void +_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1, t2, t3, t4; + movr(r0, r1); + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + t2 = jit_get_reg(jit_class_gpr); + movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); + rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); + rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); + rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); + rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# if __WORDSIZE == 32 + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + lshi(rn(t2), r0, 16); /* t2 = v << 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# else + movi(rn(t0), 0x0000ffff0000ffffL); + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ + lshi(rn(t2), r0, 32); /* t2 = v << 32 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# endif + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { + comr(r0, r1); + clzr(r0, r0); + } + else + fallback_clo(r0, r1); +} + +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { +#if __WORDSIZE == 32 + jit_word_t w; + SLLXI(r1, 32, r0); + LZCNT(r0, r0); +#if __WORDSIZE == 32 + w = blei(_jit->pc.w, r0, 31); + rshi(r0, r0, 1); /* r0 is 64 */ + patch_at(w, _jit->pc.w); +#endif +#else + LZCNT(r1, r0); + } + else + fallback_clz(r0, r1); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { + bitswap(r0, r1); + clor(r0, r0); + } + else + fallback_cto(r0, r1); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { + bitswap(r0, r1); + clzr(r0, r0); + } + else + fallback_ctz(r0, r1); } static void @@ -2259,7 +2451,7 @@ _bw(jit_state_t *_jit, jit_int32_t cc, # if __WORDSIZE == 32 B(cc, (i0 - w) >> 2); # else - B(cc, (i0 - w) >> 2); + BP(cc, (i0 - w) >> 2); # endif NOP(); } @@ -2379,14 +2571,15 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) NOP(); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; jit_int32_t reg; - w = (i0 - _jit->pc.w) >> 2; - if (s22_p(w)) { - BA(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s22_p(d)) { + BA(d); NOP(); } else { @@ -2395,6 +2588,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) jmpr(rn(reg)); jit_unget_reg(reg); } + return (w); } static jit_word_t @@ -2416,13 +2610,19 @@ _callr(jit_state_t *_jit, jit_int32_t r0) NOP(); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; - w = (i0 - _jit->pc.w) >> 2; - CALLI(w); - NOP(); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s30_p(d)) { + CALLI(d); + NOP(); + } + else + w = calli_p(i0); + return (w); } static jit_word_t @@ -2496,24 +2696,24 @@ _epilog(jit_state_t *_jit, jit_node_t *node) { if (_jitc->function->assume_frame) return; - /* (most) other backends do not save incoming arguments, so, - * only save locals here */ + if (_jitc->function->allocar) + subi(_SP_REGNO, _FP_REGNO, _jitc->function->stack); if (jit_regset_tstbit(&_jitc->function->regset, _L0)) - ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0)); + ldxi(_L0_REGNO, _SP_REGNO, _jitc->function->stack + OFF(0)); if (jit_regset_tstbit(&_jitc->function->regset, _L1)) - ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1)); + ldxi(_L1_REGNO, _SP_REGNO, _jitc->function->stack + OFF(1)); if (jit_regset_tstbit(&_jitc->function->regset, _L2)) - ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2)); + ldxi(_L2_REGNO, _SP_REGNO, _jitc->function->stack + OFF(2)); if (jit_regset_tstbit(&_jitc->function->regset, _L3)) - ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3)); + ldxi(_L3_REGNO, _SP_REGNO, _jitc->function->stack + OFF(3)); if (jit_regset_tstbit(&_jitc->function->regset, _L4)) - ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4)); + ldxi(_L4_REGNO, _SP_REGNO, _jitc->function->stack + OFF(4)); if (jit_regset_tstbit(&_jitc->function->regset, _L5)) - ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5)); + ldxi(_L5_REGNO, _SP_REGNO, _jitc->function->stack + OFF(5)); if (jit_regset_tstbit(&_jitc->function->regset, _L6)) - ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6)); + ldxi(_L6_REGNO, _SP_REGNO, _jitc->function->stack + OFF(6)); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) - ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7)); + ldxi(_L7_REGNO, _SP_REGNO, _jitc->function->stack + OFF(7)); RESTOREI(0, 0, 0); RETL(); NOP(); @@ -2594,6 +2794,11 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) else abort(); } + else if (i.op.b == 1) { + assert(s30_p((label - instr) >> 2)); + i.disp30.b = (label - instr) >> 2; + u.i[0] = i.v; + } else abort(); }