X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_sparc-fpu.c;h=3cdb870853fa93946ab2533182ee63907ac656cb;hb=b68d544b28b604e2e88e2dbab5b55826a03a269e;hp=ae2cbab35be2d3395269cff60fc70b2c2e80b18b;hpb=0adc619b5a5705d16cc6caea64951776ababbb96;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_sparc-fpu.c b/deps/lightning/lib/jit_sparc-fpu.c index ae2cbab3..3cdb8708 100644 --- a/deps/lightning/lib/jit_sparc-fpu.c +++ b/deps/lightning/lib/jit_sparc-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2019 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -93,6 +93,11 @@ # define f3f(rd, op3, rs1, opf, rs2) _f3f(_jit, rd, op3, rs1, opf, rs2) static void _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); +# define FPop3(rd, rs1, rs3, op5, rs2) f4f(rd, 55, rs1, rs3, op5, rs2) +# define f4f(rd,op3,rs1,rs3,op5,rs2) _f4f(_jit, rd,op3,rs1,rs3,op5,rs2) +static void +_f4f(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t); # define FITOS(rs2, rd) FPop1(rd, 0, 196, rs2) # define FITOD(rs2, rd) FPop1(rd, 0, 200, rs2) # define FITOQ(rs2, rd) FPop1(rd, 0, 204, rs2) @@ -139,6 +144,22 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define SPARC_FDIVS 77 # define SPARC_FDIVD 78 # define SPARC_FDIVQ 79 +# define SPARC_FMADDS 1 +# define SPARC_FMADDD 2 +# define SPARC_FMSUBS 5 +# define SPARC_FMSUBD 6 +# define SPARC_FNMSUBS 9 +# define SPARC_FNMSUBD 10 +# define SPARC_FNMADDS 13 +# define SPARC_FNMADDD 14 +# define FMADDS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDS, rs2) +# define FMADDD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMADDD, rs2) +# define FMSUBS(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBS, rs2) +# define FMSUBD(rs1, rs2, rs3, rd) f4f(rd, 55, rs1, rs3, SPARC_FMSUBD, rs2) +# define FNMSUBS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBS,rs2) +# define FNMSUBD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMSUBD,rs2) +# define FNMADDS(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDS,rs2) +# define FNMADDD(rs1, rs2, rs3,rd) f4f(rd, 55, rs1, rs3, SPARC_FNMADDD,rs2) # define FADDS(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDS, rs2) # define FADDD(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDD, rs2) # define FADDQ(rs1, rs2, rd) FPop1(rd, rs1, SPARC_FADDQ, rs2) @@ -171,10 +192,10 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); # if __WORDSIZSE == 32 # define truncr_f(r0, r1) truncr_f_i(r0, r1) -# define truncr_d(r0, r1) truncr_d_i(r0, r1) +# define truncr_d(r0, r1) truncr_d_i(r0, r1) # else # define truncr_f(r0, r1) truncr_f_l(r0, r1) -# define truncr_d(r0, r1) truncr_d_l(r0, r1) +# define truncr_d(r0, r1) truncr_d_l(r0, r1) # endif # define truncr_f_i(r0, r1) _truncr_f_i(_jit, r0, r1) static void _truncr_f_i(jit_state_t*, jit_int32_t, jit_int32_t); @@ -196,6 +217,8 @@ static void _extr_d_f(jit_state_t*, jit_int32_t, jit_int32_t); static void _movr_f(jit_state_t*, jit_int32_t, jit_int32_t); # endif static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*); +# define movi_w_f(r0, i0) _movi_w_f(_jit, r0, i0) +static void _movi_w_f(jit_state_t*, jit_int32_t, jit_word_t); # if __WORDSIZE == 32 # define negr_f(r0, r1) FNEGS(r1, r0) # define absr_f(r0, r1) FABSS(r1, r0) @@ -208,6 +231,28 @@ static void _absr_f(jit_state_t*, jit_int32_t, jit_int32_t); # define sqrtr_f(r0, r1) _sqrtr_f(_jit, r0, r1) static void _sqrtr_f(jit_state_t*, jit_int32_t, jit_int32_t); # endif +# if __WORDSIZE == 32 +# define fmar_f(r0, r1, r2, r3) FMADDS(r1, r2, r3, r0) +# define fmsr_f(r0, r1, r2, r3) FMSUBS(r1, r2, r3, r0) +# define fmar_d(r0, r1, r2, r3) FMADDD(r1, r2, r3, r0) +# define fmsr_d(r0, r1, r2, r3) FMSUBD(r1, r2, r3, r0) +# define fnmar_f(r0, r1, r2, r3) FNMADDS(r1, r2, r3, r0) +# define fnmsr_f(r0, r1, r2, r3) FNMSUBS(r1, r2, r3, r0) +# define fnmar_d(r0, r1, r2, r3) FNMADDD(r1, r2, r3, r0) +# define fnmsr_d(r0, r1, r2, r3) FNMSUBD(r1, r2, r3, r0) +# else +# define fop3f(op, r0, r1, r2, r3) _fop3f(_jit, op, r0, r1, r2, r3) +static void _fop3f(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t, jit_int32_t, jit_int32_t); +# define fmar_f(r0, r1, r2, r3) fop3f(SPARC_FMADDS, r0, r1, r2, r3) +# define fmsr_f(r0, r1, r2, r3) fop3f(SPARC_FMSUBS, r0, r1, r2, r3) +# define fmar_d(r0, r1, r2, r3) fop3f(SPARC_FMADDD, r0, r1, r2, r3) +# define fmsr_d(r0, r1, r2, r3) fop3f(SPARC_FMSUBD, r0, r1, r2, r3) +# define fnmar_f(r0, r1, r2, r3) fop3f(SPARC_FNMADDS, r0, r1, r2, r3) +# define fnmsr_f(r0, r1, r2, r3) fop3f(SPARC_FNMSUBS, r0, r1, r2, r3) +# define fnmar_d(r0, r1, r2, r3) fop3f(SPARC_FNMADDD, r0, r1, r2, r3) +# define fnmsr_d(r0, r1, r2, r3) fop3f(SPARC_FNMSUBD, r0, r1, r2, r3) +# endif # define extr_d(r0, r1) _extr_d(_jit, r0, r1) static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_d_i(r0, r1) _truncr_d_i(_jit, r0, r1) @@ -225,14 +270,18 @@ static void _extr_f_d(jit_state_t*, jit_int32_t, jit_int32_t); # define movi_d(r0, i0) _movi_d(_jit, r0, i0) static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t*); # if __WORDSIZE == 32 -# define movr_d(r0, r1) _movr_d(_jit, r0, r1) +# define movi_ww_d(r0, i0, i1) _movi_ww_d(_jit, r0, i0, i1) +static void _movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define movr_d(r0, r1) _movr_d(_jit, r0, r1) static void _movr_d(jit_state_t*, jit_int32_t, jit_int32_t); -# define negr_d(r0, r1) _negr_d(_jit, r0, r1) +# define negr_d(r0, r1) _negr_d(_jit, r0, r1) static void _negr_d(jit_state_t*, jit_int32_t, jit_int32_t); -# define absr_d(r0, r1) _absr_d(_jit, r0, r1) +# define absr_d(r0, r1) _absr_d(_jit, r0, r1) static void _absr_d(jit_state_t*, jit_int32_t, jit_int32_t); # else # define movr_d(r0, r1) FMOVD(r1, r0) +# define movi_w_d(r0, i0) _movi_w_d(_jit, r0, i0) +static void _movi_w_d(jit_state_t*, jit_int32_t, jit_word_t); # define negr_d(r0, r1) FNEGD(r1, r0) # define absr_d(r0, r1) FABSD(r1, r0) # endif @@ -497,7 +546,84 @@ _f3f(jit_state_t *_jit, jit_int32_t rd, ii(v.v); } +static void +_f4f(jit_state_t *_jit, jit_int32_t rd, jit_int32_t op3, + jit_int32_t rs1, jit_int32_t rs3, jit_int32_t op5, jit_int32_t rs2) +{ + jit_instr_t v; # if __WORDSIZE == 64 + if (rd > 31) { + assert(rd <= 63 && (rd & 1) == 0); + rd -= 31; + } + if (rs1 > 31) { + assert(rs1 <= 63 && (rs1 & 1) == 0); + rs1 -= 31; + } + if (rs2 > 31) { + assert(rs2 <= 63 && (rs2 & 1) == 0); + rs2 -= 31; + } + if (rs3 > 31) { + assert(rs3 <= 63 && (rs3 & 1) == 0); + rs3 -= 31; + } +# endif + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(rs3 & 0xffffffe0)); + assert(!(op5 & 0xfffffff0)); + assert(!(rs2 & 0xffffffe0)); + v.op.b = 2; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.rs3.b = rs3; + v.op5.b = op5; + v.rs2.b = rs2; + ii(v.v); +} + +# if __WORDSIZE == 64 +/* Handle the special case of using all float registers, as exercised + * in check/carg.c. + * For example: + * putargr_f JIT_F0 $ARG + * where JIT_F0 is %f32 and $ARG is %f31 and if %f30 (the mapping for %f31) + * is live, the jit_get_reg() call might return %f30, but, because it is + * live, will spill/reload it, generating assembly: + * + * std %f30, [ %fp + OFFS ] + * fmovd %f32, %f30 + * fmovs %f30, %f31 + * ldd [ %fp + OFFS ], %f30 + * + * what basically becomes a noop as it restores the old value. + */ +#define get_sng_reg(u) _get_sng_reg(_jit, u) +static jit_int32_t +_get_sng_reg(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t reg, tmp; + /* Attempt to get a nospill register */ + reg = jit_get_reg(CLASS_SNG | jit_class_nospill | jit_class_chk); + if (reg == JIT_NOREG) { + /* Will need to spill, so allow spilling it. */ + reg = jit_get_reg(CLASS_SNG); + /* If the special condition happens, allocate another one. + * This will generate uglier machine code (code for floats + * is already ugly), but will work, but doing a double + * spill/reload; the first one being a noop. */ + if (rn(reg) == r0 - 1) { + tmp = reg; + reg = jit_get_reg(CLASS_SNG); + jit_unget_reg(tmp); + } + } + return (reg); +} + static void _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -507,7 +633,7 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) if (single_precision_p(r1)) FMOVS(r1, r0); else { - t1 = jit_get_reg(CLASS_SNG); + t1 = get_sng_reg(r0); movr_d(rn(t1), r1); FMOVS(rn(t1), r0); jit_unget_reg(t1); @@ -515,13 +641,13 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } else { if (single_precision_p(r1)) { - t0 = jit_get_reg(CLASS_SNG); + t0 = get_sng_reg(r0); FMOVS(r1, rn(t0)); movr_d(r0, rn(t0)); jit_unget_reg(t0); } else { - t1 = jit_get_reg(CLASS_SNG); + t1 = get_sng_reg(r0); movr_d(rn(t1), r1); FMOVS(rn(t1), rn(t1)); movr_d(r0, rn(t1)); @@ -663,6 +789,16 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) ldi_f(r0, (jit_word_t)i0); } +static void +_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + movr_w_f(r0, rn(reg)); + jit_unget_reg(reg); +} + # if __WORDSIZE == 64 static void _extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) @@ -713,6 +849,19 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) } # if __WORDSIZE == 32 +static void +_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + movi(rn(t1), i1); + movr_ww_d(r0, rn(t0), rn(t1)); + jit_unget_reg(t1); + jit_unget_reg(t0); +} + static void _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -743,6 +892,16 @@ _absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) if (r0 != r1) FMOVS(r1 + 1, r0 + 1); } +# else +static void +_movi_w_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + movr_w_d(r0, rn(reg)); + jit_unget_reg(reg); +} # endif # if __WORDSIZE == 64 @@ -855,6 +1014,83 @@ _fop2f(jit_state_t *_jit, jit_int32_t op, if (mask & 4) jit_unget_reg(t2); } + +static void +_fop3f(jit_state_t *_jit, jit_int32_t op, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t x0, t0, x1, t1, x2, t2, x3, t3, mask = 0; + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + if (r0 == r1) { + x1 = x0; + movr_d(x1, r1); + if (r0 == r2) + x2 = x0; + if (r0 == r3) + x3 = x0; + } + else if (r0 == r2) { + x2 = x0; + movr_d(x2, r2); + } + else if (r0 == r3) { + x3 = x0; + movr_d(x3, r3); + } + } + else + x0 = r0; + if (!single_precision_p(r1)) { + if (r0 != r1) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + if (r1 == r2) + x2 = x1; + if (r1 == r3) + x3 = x1; + } + } + else + x1 = r1; + if (!single_precision_p(r2)) { + if (r0 != r2 && r1 != r2) { + mask |= 4; + t2 = jit_get_reg(CLASS_SNG); + x2 = rn(t2); + movr_d(x2, r2); + if (r2 == r3) + x3 = x2; + } + } + else + x2 = r2; + if (!single_precision_p(r3)) { + if (r0 != r3 && r1 != r3 && r2 != r3) { + mask |= 8; + t3 = jit_get_reg(CLASS_SNG); + x3 = rn(t3); + movr_d(x3, r3); + } + } + else + x3 = r3; + FPop3(x0, x1, x3, op, x2); + if (mask & 1) { + movr_d(r0, x0); + jit_unget_reg(t0); + } + if (mask & 2) + jit_unget_reg(t1); + if (mask & 4) + jit_unget_reg(t2); + if (mask & 8) + jit_unget_reg(t3); +} # endif static void @@ -1491,7 +1727,12 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) assert(_jitc->function->self.call & jit_call_varargs); /* Load argument. */ +#if __WORDSIZE == 64 ldr_d(r0, r1); +#else + ldr_f(r0, r1); + ldxi_f(r0 + 1, r1, 4); +#endif /* Update vararg stack pointer. */ addi(r1, r1, 8);