X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_sparc-cpu.c;h=65628678bd358a0fe3a616612c04a83e68c8afca;hb=d481fb64f2aac7a36532142cda11fa43f5ca792f;hp=90c3767b00ebb269ef3395dabc623ce3bed8dcd3;hpb=437b1e617808119c3a24a72c77cd2fa86a5d3220;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_sparc-cpu.c b/deps/lightning/lib/jit_sparc-cpu.c index 90c3767b..65628678 100644 --- a/deps/lightning/lib/jit_sparc-cpu.c +++ b/deps/lightning/lib/jit_sparc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -65,14 +65,16 @@ typedef union { struct { jit_uint32_t _: 13; jit_uint32_t b: 5; } rs1; struct { jit_uint32_t _: 18; jit_uint32_t b: 1; } i; struct { jit_uint32_t _: 18; jit_uint32_t b: 9; } opf; + struct { jit_uint32_t _: 18; jit_uint32_t b: 5; } rs3; struct { jit_uint32_t _: 19; jit_uint32_t b: 1; } x; struct { jit_uint32_t _: 19; jit_uint32_t b: 8; } asi; struct { jit_uint32_t _: 19; jit_uint32_t b: 6; } res; struct { jit_uint32_t _: 19; jit_uint32_t b: 13; } simm13; struct { jit_uint32_t _: 20; jit_uint32_t b: 7; } asix; struct { jit_uint32_t _: 20; jit_uint32_t b: 6; } asis; - struct { jit_uint32_t _: 26; jit_uint32_t b: 6; } shim; + struct { jit_uint32_t _: 23; jit_uint32_t b: 4; } op5; struct { jit_uint32_t _: 25; jit_uint32_t b: 7; } imm7; + struct { jit_uint32_t _: 26; jit_uint32_t b: 6; } shim; struct { jit_uint32_t _: 27; jit_uint32_t b: 5; } rs2; jit_int32_t v; } jit_instr_t; @@ -99,7 +101,10 @@ _f2bp(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t, # define f3r(op, rd, op3, rs1, rs2) _f3r(_jit, op, rd, op3, rs1, rs2) static void _f3r(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); -# if __WORDSIZE == 64 +# if __WORDSIZE == 64 || CHECK_LZCNT +# define f3ri(op, rd, op3, rs1, rs2) _f3ri(_jit, op, rd, op3, rs1, rs2) +static void _f3ri(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define f3rx(op, rd, op3, rs1, rs2) _f3rx(_jit, op, rd, op3, rs1, rs2) static void _f3rx(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); @@ -113,10 +118,15 @@ static void _f3s(jit_state_t*, # define f3t(cond, rs1, i, ri) _f3t(_jit, cond, rs1, i, ri) static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; -# define f3a(op, rd, op3, rs1, rs2) _f3a(_jit, op, rd, op3, rs1, asi, rs2) +# define f3a(op,rd,op3,rs1,asi,rs2) _f3a(_jit, op, rd, op3, rs1, asi, rs2) static void _f3a(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; +# define f2c1(op,rd,op3,rs1,opf,rs2) _f2c1(_jit,op,rd,op3,rs1,opf,rs2) +static void +_f2c1(jit_state_t*,jit_int32_t, jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) + maybe_unused; # define LDSB(rs1, rs2, rd) f3r(3, rd, 9, rs1, rs2) # define LDSBI(rs1, imm, rd) f3i(3, rd, 9, rs1, imm) # define LDSH(rs1, rs2, rd) f3r(3, rd, 10, rs1, rs2) @@ -194,6 +204,11 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SWAP(rs1, rs2, rd) f3r(3, rd, 15, rs1, rs2) # define SWAPI(rs1, imm, rd) f3r(3, rd, 15, rs1, imm) # define SWAPA(rs1, rs2, asi, rd) f3a(3, rd, 23, rs1, asi, rs2) +/* Sparc v9 deprecates SWAP* in favor of CAS*A */ +# define CASA(rs1, rs2, rd) f3a(3, rd, 60, rs1, 128, rs2) +# if __WORDSIZE == 64 +# define CASXA(rs1, rs2, rd) f3a(3, rd, 62, rs1, 128, rs2) +# endif # define NOP() SETHI(0, 0) # define HI(im) ((im) >> 10) # define LO(im) ((im) & 0x3ff) @@ -230,7 +245,7 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SRLI(rs1, imm, rd) f3i(2, rd, 38, rs1, imm) # define SRA(rs1, rs2, rd) f3r(2, rd, 39, rs1, rs2) # define SRAI(rs1, imm, rd) f3i(2, rd, 39, rs1, imm) -# if __WORDSIZE == 64 +# if __WORDSIZE == 64 || CHECK_LZCNT # define SLLX(rs1, rs2, rd) f3rx(2, rd, 37, rs1, rs2) # define SLLXI(rs1, imm, rd) f3s(2, rd, 37, rs1, imm) # define SRLX(rs1, rs2, rd) f3rx(2, rd, 38, rs1, rs2) @@ -537,6 +552,7 @@ static void _f3a(jit_state_t*,jit_int32_t, # define UNIMP(imm) f2r(0, 0, 0, imm) # define FLUSH(rs1, rs2) f3r(2, 0, 59, rs1, rs2) # define FLUSHI(rs1, im) f3i(2, 0, 59, rs1, imm) +# define LZCNT(rs2, rd) f2c1(2, rd, 54, 0, 23, rs2) # define nop(i0) _nop(_jit, i0) static void _nop(jit_state_t*, jit_int32_t); # define movr(r0, r1) _movr(_jit, r0, r1) @@ -552,8 +568,21 @@ static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0, r1) XNOR(r1, 0, r0) # define negr(r0, r1) NEG(r1, r0) +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # define addr(r0, r1, r2) ADD(r1, r2, r0) # define addi(r0, r1, i0) _addi(_jit, r0, r1, i0) static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); @@ -601,6 +630,10 @@ static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # endif # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2) +# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0) +# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2) +# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0) # if __WORDSIZE == 32 # define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) # define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) @@ -928,13 +961,13 @@ _bm_w(jit_state_t*,jit_bool_t,jit_word_t,jit_int32_t,jit_word_t); # define jmpr(r0) _jmpr(_jit, r0) static void _jmpr(jit_state_t*,jit_int32_t); # define jmpi(i0) _jmpi(_jit, i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit, i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) _callr(_jit, r0) static void _callr(jit_state_t*,jit_int32_t); # define calli(i0) _calli(_jit, i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit, i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(node) _prolog(_jit, node) @@ -1030,6 +1063,26 @@ _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, } # if __WORDSIZE == 64 +static void +_f3ri(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(rs2 & 0xffffffe0)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 1; + v.asi.b = 0; + v.rs2.b = rs2; + ii(v.v); +} + static void _f3rx(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) @@ -1149,6 +1202,26 @@ _f1(jit_state_t *_jit, jit_int32_t op, jit_int32_t disp30) ii(v.v); } +static void +_f2c1(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(opf & 0xfffffe00)); + assert(!(rs2 & 0xfffffe00)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.opf.b = opf; + v.rs2.b = rs2; + ii(v.v); +} + static void _nop(jit_state_t *_jit, jit_int32_t i0) { @@ -1233,6 +1306,98 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) patch_at(w, _jit->pc.w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t iscasi, r1_reg; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* Do not clobber r2 */ + movr(r0, r3); + /* The CASXA instruction compares the value in register r[rs2] with + * the doubleword in memory pointed to by the doubleword address in + * r[rs1]. If the values are equal, the value in r[rd] is swapped + * with the doubleword pointed to by the doubleword address in r[rs1]. + * If the values are not equal, the contents of the doubleword pointed + * to by r[rs1] replaces the value in r[rd], but the memory location + * remains unchanged. + */ +# if __WORDSIZE == 32 + CASA(r1, r2, r0); +# else + CASXA(r1, r2, r0); +# endif + eqr(r0, r0, r2); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { + comr(r0, r1); + clzr(r0, r0); + } + else + fallback_clo(r0, r1); +} + +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +# if CHECK_LZCNT + if (jit_cpu.lzcnt) { +# if __WORDSIZE == 32 + jit_word_t w; + SLLXI(r1, 32, r0); + LZCNT(r0, r0); + w = blei(_jit->pc.w, r0, 31); + rshi(r0, r0, 1); /* r0 is 64 */ + patch_at(w, _jit->pc.w); +# else + LZCNT(r1, r0); +# endif + } + else +# endif + fallback_clz(r0, r1); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { + comr(r0, r1); + ctzr(r0, r0); + } + else + fallback_cto(r0, r1); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (jit_cpu.lzcnt) { + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + negr(rn(t0), r1); + andr(rn(t0), rn(t0), r1); + clzr(r0, rn(t0)); + xori(rn(t1), r0, __WORDSIZE - 1); + movnr(r0, rn(t1), rn(t0)); + jit_unget_reg(t0); + jit_unget_reg(t1); + } + else + fallback_ctz(r0, r1); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1472,6 +1637,8 @@ static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) { + if (r0 == JIT_NOREG) + r0 = r1; if (sign) SMUL(r2, r3, r0); else @@ -1485,6 +1652,8 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, { jit_int32_t reg; if (s13_p(i0)) { + if (r0 == JIT_NOREG) + r0 = r1; if (sign) SMULI(r2, i0, r0); else @@ -1537,7 +1706,8 @@ _qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, movr(_O0_REGNO, r3); movr(_O1_REGNO, r2); calli((jit_word_t)__llmul); - movr(r0, _O1_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _O1_REGNO); movr(r1, _O0_REGNO); QMUL_EPILOG(); } @@ -1550,7 +1720,8 @@ _qmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, movi(_O0_REGNO, i0); movr(_O1_REGNO, r2); calli((jit_word_t)__llmul); - movr(r0, _O1_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _O1_REGNO); movr(r1, _O0_REGNO); QMUL_EPILOG(); } @@ -1568,7 +1739,8 @@ _qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, movr(_O0_REGNO, r3); movr(_O1_REGNO, r2); calli((jit_word_t)__ullmul); - movr(r0, _O1_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _O1_REGNO); movr(r1, _O0_REGNO); QMUL_EPILOG(); } @@ -1581,7 +1753,8 @@ _qmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, movi(_O0_REGNO, i0); movr(_O1_REGNO, r2); calli((jit_word_t)__ullmul); - movr(r0, _O1_REGNO); + if (r0 != JIT_NOREG) + movr(r0, _O1_REGNO); movr(r1, _O0_REGNO); QMUL_EPILOG(); } @@ -2247,7 +2420,7 @@ _bw(jit_state_t *_jit, jit_int32_t cc, # if __WORDSIZE == 32 B(cc, (i0 - w) >> 2); # else - B(cc, (i0 - w) >> 2); + BP(cc, (i0 - w) >> 2); # endif NOP(); } @@ -2367,14 +2540,15 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) NOP(); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; jit_int32_t reg; - w = (i0 - _jit->pc.w) >> 2; - if (s22_p(w)) { - BA(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s22_p(d)) { + BA(d); NOP(); } else { @@ -2383,6 +2557,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) jmpr(rn(reg)); jit_unget_reg(reg); } + return (w); } static jit_word_t @@ -2404,13 +2579,19 @@ _callr(jit_state_t *_jit, jit_int32_t r0) NOP(); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; - w = (i0 - _jit->pc.w) >> 2; - CALLI(w); - NOP(); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s30_p(d)) { + CALLI(d); + NOP(); + } + else + w = calli_p(i0); + return (w); } static jit_word_t @@ -2484,24 +2665,24 @@ _epilog(jit_state_t *_jit, jit_node_t *node) { if (_jitc->function->assume_frame) return; - /* (most) other backends do not save incoming arguments, so, - * only save locals here */ + if (_jitc->function->allocar) + subi(_SP_REGNO, _FP_REGNO, _jitc->function->stack); if (jit_regset_tstbit(&_jitc->function->regset, _L0)) - ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0)); + ldxi(_L0_REGNO, _SP_REGNO, _jitc->function->stack + OFF(0)); if (jit_regset_tstbit(&_jitc->function->regset, _L1)) - ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1)); + ldxi(_L1_REGNO, _SP_REGNO, _jitc->function->stack + OFF(1)); if (jit_regset_tstbit(&_jitc->function->regset, _L2)) - ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2)); + ldxi(_L2_REGNO, _SP_REGNO, _jitc->function->stack + OFF(2)); if (jit_regset_tstbit(&_jitc->function->regset, _L3)) - ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3)); + ldxi(_L3_REGNO, _SP_REGNO, _jitc->function->stack + OFF(3)); if (jit_regset_tstbit(&_jitc->function->regset, _L4)) - ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4)); + ldxi(_L4_REGNO, _SP_REGNO, _jitc->function->stack + OFF(4)); if (jit_regset_tstbit(&_jitc->function->regset, _L5)) - ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5)); + ldxi(_L5_REGNO, _SP_REGNO, _jitc->function->stack + OFF(5)); if (jit_regset_tstbit(&_jitc->function->regset, _L6)) - ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6)); + ldxi(_L6_REGNO, _SP_REGNO, _jitc->function->stack + OFF(6)); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) - ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7)); + ldxi(_L7_REGNO, _SP_REGNO, _jitc->function->stack + OFF(7)); RESTOREI(0, 0, 0); RETL(); NOP(); @@ -2582,6 +2763,11 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) else abort(); } + else if (i.op.b == 1) { + assert(s30_p((label - instr) >> 2)); + i.disp30.b = (label - instr) >> 2; + u.i[0] = i.v; + } else abort(); }