X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_mips-fpu.c;h=61db30e341d4235daef18e3f41aaf691c6f50bc4;hb=016c6e93f6db684211f5c8b05433cb500715ba50;hp=6209fd680f1a02d018b6bd069f8d4dc03282025e;hpb=c0c162422385a60ea7c8fa1dfe439e83e0a13d88;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_mips-fpu.c b/deps/lightning/lib/jit_mips-fpu.c index 6209fd68..61db30e3 100644 --- a/deps/lightning/lib/jit_mips-fpu.c +++ b/deps/lightning/lib/jit_mips-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -27,6 +27,8 @@ # define MIPS_fmt_PS 0x16 /* 2 x float32 */ # define MIPS_fmt_S_PU 0x20 # define MIPS_fmt_S_PL 0x26 +# define MIPS_condn_S 0x14 /* release 6 */ +# define MIPS_condn_D 0x15 /* release 6 */ # define MIPS_ADD_fmt 0x00 # define MIPS_LWXC1 0x00 # define MIPS_SUB_fmt 0x01 @@ -51,6 +53,8 @@ # define MIPS_FLOOR_fmt_W 0x0f # define MIPS_RECIP 0x15 # define MIPS_RSQRT 0x16 +# define MIPS_MADDF 0x18 +# define MIPS_MSUBF 0x19 # define MIPS_ALNV_PS 0x1e # define MIPS_CVT_fmt_S 0x20 # define MIPS_CVT_fmt_D 0x21 @@ -60,18 +64,18 @@ # define MIPS_PLU 0x2d # define MIPS_PUL 0x2e # define MIPS_PUU 0x2f -# define MIPS_MADD_fmt_S (0x20 | MIPS_fmt_S) -# define MIPS_MADD_fmt_D (0x20 | MIPS_fmt_D) -# define MIPS_MADD_fmt_PS (0x20 | MIPS_fmt_PS) -# define MIPS_MSUB_fmt_S (0x28 | MIPS_fmt_S) -# define MIPS_MSUB_fmt_D (0x28 | MIPS_fmt_D) -# define MIPS_MSUB_fmt_PS (0x28 | MIPS_fmt_PS) -# define MIPS_NMADD_fmt_S (0x30 | MIPS_fmt_S) -# define MIPS_NMADD_fmt_D (0x30 | MIPS_fmt_D) -# define MIPS_NMADD_fmt_PS (0x30 | MIPS_fmt_PS) -# define MIPS_NMSUB_fmt_S (0x38 | MIPS_fmt_S) -# define MIPS_NMSUB_fmt_D (0x38 | MIPS_fmt_D) -# define MIPS_NMSUB_fmt_PS (0x38 | MIPS_fmt_PS) +# define MIPS_MADD_fmt_S 0x20 +# define MIPS_MADD_fmt_D 0x21 +# define MIPS_MADD_fmt_PS 0x22 +# define MIPS_MSUB_fmt_S 0x28 +# define MIPS_MSUB_fmt_D 0x29 +# define MIPS_MSUB_fmt_PS 0x2a +# define MIPS_NMADD_fmt_S 0x30 +# define MIPS_NMADD_fmt_D 0x31 +# define MIPS_NMADD_fmt_PS 0x32 +# define MIPS_NMSUB_fmt_S 0x38 +# define MIPS_NMSUB_fmt_D 0x39 +# define MIPS_NMSUB_fmt_PS 0x3a # define MIPS_cond_F 0x30 # define MIPS_cond_UN 0x31 # define MIPS_cond_EQ 0x32 @@ -88,6 +92,23 @@ # define MIPS_cond_NGE 0x3d # define MIPS_cond_LE 0x3e # define MIPS_cond_UGT 0x3f +/* Mips release 6 */ +# define MIPS_cmp_AF 0x00 +# define MIPS_cmp_UN 0x01 +# define MIPS_cmp_EQ 0x02 +# define MIPS_cmp_UEQ 0x03 +# define MIPS_cmp_LT 0x04 +# define MIPS_cmp_ULT 0x05 +# define MIPS_cmp_LE 0x06 +# define MIPS_cmp_ULE 0x07 +# define MIPS_cmp_SAF 0x08 +# define MIPS_cmp_SUN 0x09 +# define MIPS_cmp_SEQ 0x0a +# define MIPS_cmp_SUEQ 0x0b +# define MIPS_cmp_SLT 0x0c +# define MIPS_cmp_SULT 0x0d +# define MIPS_cmp_SLE 0x0e +# define MIPS_cmp_SULE 0x0f # define ADD_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_ADD_fmt) # define ADD_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_ADD_fmt) # define SUB_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_SUB_fmt) @@ -103,7 +124,9 @@ # define SQRT_S(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_SQRT_fmt) # define SQRT_D(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_SQRT_fmt) # define MFC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MF,rt,fs,0,0) +# define MFHC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MFH,rt,fs,0,0) # define MTC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MT,rt,fs,0,0) +# define MTHC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MTH,rt,fs,0,0) # define DMFC1(rt, fs) hrrrit(MIPS_COP1,MIPS_DMF,rt,fs,0,0) # define DMTC1(rt, fs) hrrrit(MIPS_COP1,MIPS_DMT,rt,fs,0,0) # define CVT_D_S(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_D) @@ -123,6 +146,20 @@ # define TRUNC_L_D(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_TRUNC_fmt_L) # define TRUNC_W_S(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_TRUNC_fmt_W) # define TRUNC_W_D(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_TRUNC_fmt_W) +# define MADD_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MADD_fmt_S) +# define MADD_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MADD_fmt_D) +# define MSUB_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MSUB_fmt_S) +# define MSUB_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_MSUB_fmt_D) +# define NMADD_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMADD_fmt_S) +# define NMADD_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMADD_fmt_D) +# define NMSUB_S(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMSUB_fmt_S) +# define NMSUB_D(fd,fr,fs,ft) hrrrit(MIPS_COP1X,fr,ft,fs,fd,MIPS_NMSUB_fmt_D) +/* fd = fd + (fs * ft) */ +# define MADDF_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MADDF) +# define MADDF_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MADDF) +/* fd = fd - (fs * ft) */ +# define MSUBF_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_MSUBF) +# define MSUBF_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_MSUBF) # define LWC1(rt, of, rb) hrri(MIPS_LWC1, rb, rt, of) # define SWC1(rt, of, rb) hrri(MIPS_SWC1, rb, rt, of) # define LDC1(rt, of, rb) hrri(MIPS_LDC1, rb, rt, of) @@ -130,7 +167,9 @@ # define MOV_S(fd, fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_MOV_fmt) # define MOV_D(fd, fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_MOV_fmt) # define BC1F(im) hrri(MIPS_COP1,MIPS_BC,MIPS_BCF,im) +# define BC1EQZ(ft,im) hrri(MIPS_COP1,MIPS_BC1EQZ,ft,im) # define BC1T(im) hrri(MIPS_COP1,MIPS_BC,MIPS_BCT,im) +# define BC1NEZ(ft,im) hrri(MIPS_COP1,MIPS_BC1NEZ,ft,im) # define C_F_S(fs,ft) c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_F) # define C_F_D(fs,ft) c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_F) # define C_F_PS(fs,ft) c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_F) @@ -183,6 +222,42 @@ static void _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t ft, jit_int32_t fs, jit_int32_t cc); +# define CMP_AF_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_AF) +# define CMP_AF_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_AF) +# define CMP_UN_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UN) +# define CMP_UN_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UN) +# define CMP_EQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_EQ) +# define CMP_EQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_EQ) +# define CMP_UEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UEQ) +# define CMP_UEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UEQ) +# define CMP_LT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LT) +# define CMP_LT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LT) +# define CMP_ULT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULT) +# define CMP_ULT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULT) +# define CMP_LE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LE) +# define CMP_LE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LE) +# define CMP_ULE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULE) +# define CMP_ULE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULE) +# define CMP_SAF_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SAF) +# define CMP_SAF_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SAF) +# define CMP_SUN_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUN) +# define CMP_SUN_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUN) +# define CMP_SEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SEQ) +# define CMP_SEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SEQ) +# define CMP_SUEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUEQ) +# define CMP_SUEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUEQ) +# define CMP_SLT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLT) +# define CMP_SLT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLT) +# define CMP_SULT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULT) +# define CMP_SULT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULT) +# define CMP_SLE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLE) +# define CMP_SLE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLE) +# define CMP_SULE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULE) +# define CMP_SULE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULE) +# define cmp_cond_fmt(fm,fd,ft,fs,cn) _cmp_cond_fmt(_jit,fm,fd,ft,fs,cn) +static void +_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd, + jit_int32_t ft, jit_int32_t fs, jit_int32_t cn); # define addr_f(r0,r1,r2) ADD_S(r0,r1,r2) # define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -220,9 +295,9 @@ static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define sqrtr_f(r0,r1) SQRT_S(r0,r1) # define sqrtr_d(r0,r1) SQRT_D(r0,r1) # define movr_w_f(r0, r1) MTC1(r1, r0) -# define movr_f_w(r0, r1) MFC1(r1, r0) -# define movi_f_w(r0, i0) _movi_f_w(_jit, r0, i0) -static void _movi_f_w(jit_state_t*,jit_int32_t,jit_float32_t*); +# define movr_f_w(r0, r1) MFC1(r0, r1) +# define movi_w_f(r0, i0) _movi_w_f(_jit, r0, i0) +static void _movi_w_f(jit_state_t*, jit_int32_t, jit_word_t); # define extr_f(r0, r1) _extr_f(_jit, r0, r1) static void _extr_f(jit_state_t*,jit_int32_t,jit_int32_t); # define truncr_f_i(r0, r1) _truncr_f_i(_jit, r0, r1) @@ -239,6 +314,10 @@ static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define ldxi_f(r0, r1, i0) _ldxi_f(_jit, r0, r1, i0) static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define unldr_x(r0, r1, i0) _unldr_x(_jit, r0, r1, i0) +static void _unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define unldi_x(r0, i0, i1) _unldi_x(_jit, r0, i0, i1) +static void _unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); # define str_f(r0, r1) SWC1(r1, 0, r0) # define sti_f(i0, r0) _sti_f(_jit, i0, r0) static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); @@ -246,28 +325,47 @@ static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_f(i0, r0, r1) _stxi_f(_jit, i0, r0, r1) static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define unstr_x(r0, r1, i0) _unstr_x(_jit, r0, r1, i0) +static void _unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define unsti_x(i0, r0, i1) _unsti_x(_jit, i0, r0, i1) +static void _unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +# define fmar_f(r0, r1, r2, r3) _fmar_f(_jit, r0, r1, r2, r3) +static void _fmar_f(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fmsr_f(r0, r1, r2, r3) _fmsr_f(_jit, r0, r1, r2, r3) +static void _fmsr_f(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fnmar_f(r0, r1, r2, r3) _fnmar_f(_jit, r0, r1, r2, r3) +static void _fnmar_f(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fnmsr_f(r0, r1, r2, r3) _fnmsr_f(_jit, r0, r1, r2, r3) +static void _fnmsr_f(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); # define movr_f(r0, r1) _movr_f(_jit, r0, r1) static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t); # define movi_f(r0, i0) _movi_f(_jit, r0, i0) static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*); -# if NEW_ABI -# if __WORDSIZE == 32 -# define movi64(r0, i0) _movi64(_jit, r0, i0) +# if NEW_ABI && __WORDSIZE == 32 +# define movi64(r0, i0) _movi64(_jit, r0, i0) static void _movi64(jit_state_t*,jit_int32_t,jit_int64_t); -# else -# define movi64(r0, i0) movi(r0, i0) -# endif +# define movi_d_w(r0, i0) _movi_d_w(_jit, r0, i0) +static void _movi_d_w(jit_state_t*,jit_int32_t,jit_int64_t); +# elif __WORDSIZE == 64 +# define movi64(r0, i0) movi(r0, i0) +# endif +# if __WORDSIZE == 64 || NEW_ABI # define movr_w_d(r0, r1) DMTC1(r1, r0) # define movr_d_w(r0, r1) DMFC1(r0, r1) -# define movi_d_w(r0, i0) _movi_d_w(_jit,r0,i0) -static void _movi_d_w(jit_state_t*,jit_int32_t,jit_float64_t*); -# else +# define movi_w_d(r0, i0) _movi_w_d(_jit, r0, i0) +static void _movi_w_d(jit_state_t*, jit_int32_t, jit_word_t); +# endif +# if __WORDSIZE == 32 # define movr_ww_d(r0, r1, r2) _movr_ww_d(_jit, r0, r1, r2) static void _movr_ww_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movr_d_ww(r0, r1, r2) _movr_d_ww(_jit, r0, r1, r2) static void _movr_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); -# define movi_d_ww(r0, r1, i0) _movi_d_ww(_jit, r0, r1, i0) -static void _movi_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define movi_ww_d(r0, i0, i1) _movi_ww_d(_jit, r0, i0, i1) +static void _movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); # endif # define extr_d(r0, r1) _extr_d(_jit, r0, r1) static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t); @@ -293,6 +391,18 @@ static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t); static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define stxi_d(i0, r0, r1) _stxi_d(_jit, i0, r0, r1) static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define fmar_d(r0, r1, r2, r3) _fmar_d(_jit, r0, r1, r2, r3) +static void _fmar_d(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fmsr_d(r0, r1, r2, r3) _fmsr_d(_jit, r0, r1, r2, r3) +static void _fmsr_d(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fnmar_d(r0, r1, r2, r3) _fnmar_d(_jit, r0, r1, r2, r3) +static void _fnmar_d(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define fnmsr_d(r0, r1, r2, r3) _fnmsr_d(_jit, r0, r1, r2, r3) +static void _fnmsr_d(jit_state_t*, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); # define movr_d(r0, r1) _movr_d(_jit, r0, r1) static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define movi_d(r0, i0) _movi_d(_jit, r0, i0) @@ -565,7 +675,22 @@ _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm, i.ft.b = ft; i.fm.b = fm; i.hc.b = MIPS_COP1; - ii(i.op); + instr(i.op); +} + +static void +_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd, + jit_int32_t ft, jit_int32_t fs, jit_int32_t cn) +{ + jit_instr_t i; + i.op = 0; /* must have bit 6 zero ed */ + i.cn.b = cn; + i.ft.b = ft; + i.fs.b = fs; + i.fd.b = fd; + i.fm.b = fm; + i.hc.b = MIPS_COP1; + instr(i.op); } # define fpr_opi(name, type, size) \ @@ -604,18 +729,6 @@ fopi(rsb) fopi(mul) fopi(div) -static void -_movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) -{ - union { - jit_int32_t i; - jit_float32_t f; - } data; - - data.f = *i0; - movi(r0, data.i); -} - static void _extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -691,6 +804,86 @@ _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; +# if __WORDSIZE == 32 + jit_int32_t t1, r3; +# endif + if (jit_unaligned_p()) { + assert(i0 == 4 || i0 == 8); + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i0 == 4) { + unldr(r2, r1, 4); + movr_w_f(r0, r2); + } + else { +# if __WORDSIZE == 32 + t1 = jit_get_reg(jit_class_gpr); + r3 = rn(t1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + unldr(r2, r1, 4); + addi(r3, r1, 4); + unldr(r3, r3, 4); +# else + unldr(r3, r1, 4); + addi(r2, r1, 4); + unldr(r2, r2, 4); +# endif + movr_ww_d(r0, r2, r3); + jit_unget_reg(t1); +# else + unldr(r2, r1, 8); + movr_w_d(r0, r2); +# endif + } + jit_unget_reg(t0); + } + else + generic_unldr_x(r0, r1, i0); +} + +static void +_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; +# if __WORDSIZE == 32 + jit_int32_t t1, r3; +# endif + if (jit_unaligned_p()) { + assert(i1 == 4 || i1 == 8); + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + if (i1 == 4) { + unldi(r2, i0, 4); + movr_w_f(r0, r2); + } + else { +# if __WORDSIZE == 32 + t1 = jit_get_reg(jit_class_gpr); + r3 = rn(t1); +# if __BYTE_ORDER == __LITTLE_ENDIAN + unldi(r2, i0, 4); + unldi(r3, i0 + 4, 4); +# else + unldi(r3, i0, 4); + unldi(r2, i0 + 4, 4); +# endif + movr_ww_d(r0, r3, r2); + jit_unget_reg(t1); +# else + unldi(r2, i0, 8); + movr_w_d(r0, r2); +# endif + } + jit_unget_reg(t0); + } + else + generic_unldi_x(r0, i0, i1); +} + static void _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { @@ -729,6 +922,153 @@ _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } +static void +_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (jit_unaligned_p()) + fallback_unstr_x(r0, r1, i0); + else + generic_unstr_x(r0, r1, i0); +} + +static void +_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + if (jit_unaligned_p()) + fallback_unsti_x(i0, r0, i1); + else + fallback_unsti_x(i0, r0, i1); +} + + +static void +_fmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MADDF_S(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_f(rn(t0), r3); + MADDF_S(rn(t0), r2, r1); + movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else + MADD_S(r0, r3, r2, r1); + } + else { + if (r0 != r3) { + mulr_f(r0, r1, r2); + addr_f(r0, r0, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr); + mulr_f(rn(t0), r1, r2); + addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } + } +} + +static void +_fmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MSUBF_S(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_f(rn(t0), r3); + MSUBF_S(rn(t0), r2, r1); + movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + negr_f(r0, r0); + } + else + MSUB_S(r0, r3, r2, r1); + } + else { + if (r0 != r3) { + mulr_f(r0, r1, r2); + subr_f(r0, r0, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr); + mulr_f(rn(t0), r1, r2); + subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } + } +} + +static void +_fnmar_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MADDF_S(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_f(rn(t0), r3); + MADDF_S(rn(t0), r2, r1); + movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + negr_f(r0, r0); + } + else + NMADD_S(r0, r3, r2, r1); + } + else { + t0 = jit_get_reg(jit_class_fpr); + negr_f(rn(t0), r1); + mulr_f(rn(t0), rn(t0), r2); + subr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_fnmsr_f(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MSUBF_S(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_f(rn(t0), r3); + MSUBF_S(rn(t0), r2, r1); + movr_f(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else + NMSUB_S(r0, r3, r2, r1); + } + else { + t0 = jit_get_reg(jit_class_fpr); + negr_f(rn(t0), r1); + mulr_f(rn(t0), rn(t0), r2); + addr_f(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + static void _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -760,13 +1100,23 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) MTC1(_ZERO_REGNO, r0); } +static void +_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + movr_w_f(r0, rn(reg)); + jit_unget_reg(reg); +} + dopi(add) dopi(sub) dopi(rsb) dopi(mul) dopi(div) -#if NEW_ABI +#if NEW_ABI && __WORDSIZE == 32 /* n32 abi requires 64 bit cpu */ static void _movi64(jit_state_t *_jit, jit_int32_t r0, jit_int64_t i0) @@ -802,57 +1152,68 @@ _movi64(jit_state_t *_jit, jit_int32_t r0, jit_int64_t i0) } static void -_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) +_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_int64_t i0) { - jit_word_t w; union { jit_int64_t l; jit_float64_t d; } data; - if (_jitc->no_data) { - data.d = *i0; - movi64(r0, data.l); - } - else { - w = (jit_word_t)i0; - if (can_sign_extend_short_p(w)) - LD(r0, w, _ZERO_REGNO); - else { - movi(r0, w); - LD(r0, 0, r0); - } - } + data.d = i0; + movi64(r0, data.l); } +#endif -#else +#if __WORDSIZE == 64 || NEW_ABI static void -_movr_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +_movi_w_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { - assert(r1 == r2 - 1); - MTC1(r1, r0 + BE_P); - MTC1(r2, r0 + LE_P); + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi64(rn(reg), i0); + movr_w_d(r0, rn(reg)); + jit_unget_reg(reg); } +#endif +#if __WORDSIZE == 32 static void -_movr_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +_movr_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - assert(r0 == r1 - 1); - MFC1(r0, r2 + BE_P); - MFC1(r1, r2 + LE_P); + if (jit_mips6_p()) { + MTC1(r1, r0); + MTHC1(r2, r0); + } + else { + MTC1(r1, r0 + BE_P); + MTC1(r2, r0 + LE_P); + } } static void -_movi_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) +_movr_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - union { - jit_int32_t i[2]; - jit_int64_t l; - jit_float64_t d; - } data; + if (jit_mips6_p()) { + MFC1(r0, r2); + MFHC1(r1, r2); + } + else { + MFC1(r0, r2 + BE_P); + MFC1(r1, r2 + LE_P); + } +} - data.d = *i0; - movi(r0, data.i[0]); - movi(r1, data.i[1]); +static void +_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, t1; + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + movi(rn(t0), i0); + movi(rn(t1), i1); + MTC1(rn(t0), r0); + MTC1(rn(t1), r0 + 1); + jit_unget_reg(t1); + jit_unget_reg(t0); } #endif @@ -896,40 +1257,40 @@ _truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { -# if __WORDSIZE == 64 || NEW_ABI - LDC1(r0, 0, r1); -# else - LWC1(r0 + BE_P, 0, r1); - LWC1(r0 + LE_P, 4, r1); -# endif + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) + LDC1(r0, 0, r1); + else { + LWC1(r0 + BE_P, 0, r1); + LWC1(r0 + LE_P, 4, r1); + } } static void _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; -# if __WORDSIZE == 64 || NEW_ABI - if (can_sign_extend_short_p(i0)) - LDC1(r0, i0, _ZERO_REGNO); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - LDC1(r0, 0, rn(reg)); - jit_unget_reg(reg); - } -# else - if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { - LWC1(r0 + BE_P, i0, _ZERO_REGNO); - LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO); + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) { + if (can_sign_extend_short_p(i0)) + LDC1(r0, i0, _ZERO_REGNO); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDC1(r0, 0, rn(reg)); + jit_unget_reg(reg); + } } else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - LWC1(r0 + BE_P, 0, rn(reg)); - LWC1(r0 + LE_P, 4, rn(reg)); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { + LWC1(r0 + BE_P, i0, _ZERO_REGNO); + LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_d(r0, rn(reg)); + jit_unget_reg(reg); + } } -# endif } static void @@ -946,52 +1307,60 @@ static void _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; -# if __WORDSIZE == 64 || NEW_ABI - if (can_sign_extend_short_p(i0)) - LDC1(r0, i0, r1); -# else - if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { - LWC1(r0 + BE_P, i0, r1); - LWC1(r0 + LE_P, i0 + 4, r1); + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) { + if (can_sign_extend_short_p(i0)) + LDC1(r0, i0, r1); + else + goto fallback; } -# endif else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_d(r0, rn(reg)); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { + LWC1(r0 + BE_P, i0, r1); + LWC1(r0 + LE_P, i0 + 4, r1); + } + else { + fallback: + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + ldr_d(r0, rn(reg)); + jit_unget_reg(reg); + } } } static void _str_d(jit_state_t *_jit,jit_int32_t r0, jit_int32_t r1) { -# if __WORDSIZE == 64 || NEW_ABI - SDC1(r1, 0, r0); -# else - SWC1(r1 + BE_P, 0, r0); - SWC1(r1 + LE_P, 4, r0); -# endif + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) + SDC1(r1, 0, r0); + else { + SWC1(r1 + BE_P, 0, r0); + SWC1(r1 + LE_P, 4, r0); + } } static void _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; -# if __WORDSIZE == 64 || NEW_ABI - if (can_sign_extend_short_p(i0)) - SDC1(r0, i0, _ZERO_REGNO); -# else - if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { - SWC1(r0 + BE_P, i0, _ZERO_REGNO); - SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO); + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) { + if (can_sign_extend_short_p(i0)) + SDC1(r0, i0, _ZERO_REGNO); + else + goto fallback; } -# endif else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - str_d(rn(reg), r0); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { + SWC1(r0 + BE_P, i0, _ZERO_REGNO); + SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO); + } + else { + fallback: + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + str_d(rn(reg), r0); + jit_unget_reg(reg); + } } } @@ -1009,48 +1378,180 @@ static void _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; -# if __WORDSIZE == 64 || NEW_ABI - if (can_sign_extend_short_p(i0)) - SDC1(r1, i0, r0); -# else - if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { - SWC1(r1 + BE_P, i0, r0); - SWC1(r1 + LE_P, i0 + 4, r0); + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) { + if (can_sign_extend_short_p(i0)) + SDC1(r1, i0, r0); + else + goto fallback; } -# endif else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r0, i0); - str_d(rn(reg), r1); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { + SWC1(r1 + BE_P, i0, r0); + SWC1(r1 + LE_P, i0 + 4, r0); + } + else { + fallback: + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r0, i0); + str_d(rn(reg), r1); + jit_unget_reg(reg); + } } } static void -_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_fmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { - if (r0 != r1) - MOV_D(r0, r1); + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MADDF_D(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_d(rn(t0), r3); + MADDF_D(rn(t0), r2, r1); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else + MADD_D(r0, r3, r2, r1); + } + else { + if (r0 != r3) { + mulr_d(r0, r1, r2); + addr_d(r0, r0, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr); + mulr_d(rn(t0), r1, r2); + addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } + } } static void -_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) +_fmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) { - union { - jit_int32_t i[2]; - jit_int64_t l; - jit_float64_t d; - } data; - jit_int32_t reg; - - data.d = *i0; -# if __WORDSIZE == 64 || NEW_ABI - if (data.l) { - if (_jitc->no_data) { - reg = jit_get_reg(jit_class_gpr); - movi64(rn(reg), data.l); - DMTC1(rn(reg), r0); - jit_unget_reg(reg); + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MSUBF_D(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_d(rn(t0), r3); + MSUBF_D(rn(t0), r2, r1); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + negr_d(r0, r0); + } + else + MSUB_D(r0, r3, r2, r1); + } + else { + if (r0 != r3) { + mulr_d(r0, r1, r2); + subr_d(r0, r0, r3); + } + else { + t0 = jit_get_reg(jit_class_fpr); + mulr_d(rn(t0), r1, r2); + subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } + } +} + +static void +_fnmar_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MADDF_D(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_d(rn(t0), r3); + MADDF_D(rn(t0), r2, r1); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + negr_d(r0, r0); + } + else + NMADD_D(r0, r3, r2, r1); + } + else { + t0 = jit_get_reg(jit_class_fpr); + negr_d(rn(t0), r1); + mulr_d(rn(t0), rn(t0), r2); + subr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_fnmsr_d(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t t0; + if (jit_mips2_p()) { + if (jit_mips6_p()) { + if (r0 == r3) + MSUBF_D(r0, r2, r1); + else { + t0 = jit_get_reg(jit_class_fpr); + movr_d(rn(t0), r3); + MSUBF_D(rn(t0), r2, r1); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else + NMSUB_D(r0, r3, r2, r1); + } + else { + t0 = jit_get_reg(jit_class_fpr); + negr_d(rn(t0), r1); + mulr_d(rn(t0), rn(t0), r2); + addr_d(r0, rn(t0), r3); + jit_unget_reg(t0); + } +} + +static void +_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 != r1) + MOV_D(r0, r1); +} + +static void +_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) +{ + union { + jit_int32_t i[2]; + jit_int64_t l; + jit_float64_t d; + } data; + jit_int32_t reg; + + data.d = *i0; +# if __WORDSIZE == 64 || NEW_ABI + if (data.l) { + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); + movi64(rn(reg), data.l); + DMTC1(rn(reg), r0); + jit_unget_reg(reg); } else ldi_d(r0, (jit_word_t)i0); @@ -1058,30 +1559,49 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) else DMTC1(_ZERO_REGNO, r0); # else - if (_jitc->no_data) - reg = jit_get_reg(jit_class_gpr); - if (data.i[0]) { + if (jit_mips6_p()) { if (_jitc->no_data) { - movi(rn(reg), data.i[0]); - MTC1(rn(reg), r0 + BE_P); + reg = jit_get_reg(jit_class_gpr); +# if __WORDSIZE == 64 + movi(rn(reg), data.l); + DMTC1(rn(reg), r0); +# else + movi(rn(reg), data.i[0 + BE_P]); + MTC1(rn(reg), r0); + movi(rn(reg), data.i[0 + LE_P]); + MTHC1(rn(reg), r0); +# endif + jit_unget_reg(reg); } else - ldi_f(r0 + BE_P, (jit_word_t)i0); + ldi_d(r0, (jit_word_t)i0); } - else - MTC1(_ZERO_REGNO, r0 + BE_P); - if (data.i[1]) { - if (_jitc->no_data) { - movi(rn(reg), data.i[1]); - MTC1(rn(reg), r0 + LE_P); + else { + if (_jitc->no_data) + reg = jit_get_reg(jit_class_gpr); + if (data.i[0]) { + if (_jitc->no_data) { + movi(rn(reg), data.i[0]); + MTC1(rn(reg), r0 + BE_P); + } + else + ldi_f(r0 + BE_P, (jit_word_t)i0); } else - ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4); + MTC1(_ZERO_REGNO, r0 + BE_P); + if (data.i[1]) { + if (_jitc->no_data) { + movi(rn(reg), data.i[1]); + MTC1(rn(reg), r0 + LE_P); + } + else + ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4); + } + else + MTC1(_ZERO_REGNO, r0 + LE_P); + if (_jitc->no_data) + jit_unget_reg(reg); } - else - MTC1(_ZERO_REGNO, r0 + LE_P); - if (_jitc->no_data) - jit_unget_reg(reg); # endif } @@ -1089,13 +1609,26 @@ static void _ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LT_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_OLT_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(lt) @@ -1103,13 +1636,26 @@ static void _ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LE_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_OLE_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(le) @@ -1117,13 +1663,26 @@ static void _eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_EQ_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_EQ_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(eq) @@ -1131,13 +1690,26 @@ static void _ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULT_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_ULT_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ge) @@ -1145,13 +1717,26 @@ static void _gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULE_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_ULE_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(gt) @@ -1159,13 +1744,26 @@ static void _ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_EQ_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_EQ_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ne) @@ -1173,13 +1771,26 @@ static void _unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULT_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_ULT_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(unlt) @@ -1187,13 +1798,26 @@ static void _unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULE_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_ULE_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(unle) @@ -1201,13 +1825,26 @@ static void _uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UEQ_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_UEQ_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(uneq) @@ -1215,13 +1852,26 @@ static void _unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LT_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_OLT_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(unge) @@ -1229,13 +1879,26 @@ static void _ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LE_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_OLE_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ungt) @@ -1243,13 +1906,26 @@ static void _ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UEQ_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_UEQ_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ltgt) @@ -1257,13 +1933,26 @@ static void _ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UN_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_UN_S(r1, r2); + flush(); + /* cannot optimize delay slot */ + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ord) @@ -1271,13 +1960,26 @@ static void _unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UN_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_UN_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(unord) @@ -1285,10 +1987,25 @@ static jit_word_t _bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LT_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLT_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(lt) @@ -1297,10 +2014,25 @@ static jit_word_t _bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LE_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLE_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(le) @@ -1309,10 +2041,25 @@ static jit_word_t _beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_EQ_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_EQ_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(eq) @@ -1321,10 +2068,25 @@ static jit_word_t _bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULT_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULT_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ge) @@ -1333,10 +2095,25 @@ static jit_word_t _bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULE_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULE_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(gt) @@ -1345,10 +2122,25 @@ static jit_word_t _bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_EQ_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_EQ_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ne) @@ -1357,10 +2149,25 @@ static jit_word_t _bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULT_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULT_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(unlt) @@ -1369,10 +2176,25 @@ static jit_word_t _bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULE_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULE_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(unle) @@ -1381,10 +2203,25 @@ static jit_word_t _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UEQ_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UEQ_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(uneq) @@ -1393,10 +2230,25 @@ static jit_word_t _bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LT_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLT_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(unge) @@ -1405,10 +2257,25 @@ static jit_word_t _bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LE_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLE_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ungt) @@ -1417,10 +2284,25 @@ static jit_word_t _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UEQ_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UEQ_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ltgt) @@ -1429,10 +2311,25 @@ static jit_word_t _bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UN_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UN_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ord) @@ -1441,10 +2338,25 @@ static jit_word_t _bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UN_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UN_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(unord) @@ -1453,13 +2365,26 @@ static void _ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LT_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_OLT_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(lt) @@ -1467,13 +2392,26 @@ static void _ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LE_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_OLE_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(le) @@ -1481,13 +2419,26 @@ static void _eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_EQ_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_EQ_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(eq) @@ -1495,13 +2446,26 @@ static void _ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULT_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_ULT_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ge) @@ -1509,13 +2473,26 @@ static void _gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULE_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_ULE_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(gt) @@ -1523,13 +2500,26 @@ static void _ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_EQ_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_EQ_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ne) @@ -1537,13 +2527,26 @@ static void _unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULT_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_ULT_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(unlt) @@ -1551,13 +2554,26 @@ static void _unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULE_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_ULE_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(unle) @@ -1565,13 +2581,26 @@ static void _uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UEQ_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_UEQ_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(uneq) @@ -1579,13 +2608,26 @@ static void _unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LT_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_OLT_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(unge) @@ -1593,13 +2635,26 @@ static void _ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LE_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_OLE_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ungt) @@ -1607,13 +2662,26 @@ static void _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UEQ_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_UEQ_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ltgt) @@ -1621,13 +2689,26 @@ static void _ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UN_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_UN_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ord) @@ -1635,13 +2716,26 @@ static void _unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UN_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_UN_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(unord) @@ -1649,10 +2743,25 @@ static jit_word_t _bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LT_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLT_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(lt) @@ -1661,10 +2770,25 @@ static jit_word_t _bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LE_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLE_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(le) @@ -1673,10 +2797,25 @@ static jit_word_t _beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_EQ_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_EQ_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(eq) @@ -1685,10 +2824,25 @@ static jit_word_t _bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULT_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULT_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ge) @@ -1697,10 +2851,25 @@ static jit_word_t _bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULE_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULE_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(gt) @@ -1709,10 +2878,25 @@ static jit_word_t _bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_EQ_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_EQ_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ne) @@ -1721,10 +2905,25 @@ static jit_word_t _bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULT_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULT_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(unlt) @@ -1733,10 +2932,25 @@ static jit_word_t _bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULE_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULE_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(unle) @@ -1745,10 +2959,25 @@ static jit_word_t _buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UEQ_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UEQ_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(uneq) @@ -1757,10 +2986,25 @@ static jit_word_t _bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LT_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLT_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(unge) @@ -1769,10 +3013,25 @@ static jit_word_t _bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LE_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLE_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ungt) @@ -1781,10 +3040,25 @@ static jit_word_t _bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UEQ_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UEQ_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ltgt) @@ -1793,10 +3067,25 @@ static jit_word_t _bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UN_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UN_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ord) @@ -1805,10 +3094,25 @@ static jit_word_t _bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UN_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UN_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(unord)