# define sser(c,r0,r1) _sser(_jit,c,r0,r1)
static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
-static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+static void _ssexr(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
# define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
# define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
# define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
-# define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
+# define movdxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
+# define movdrx(r0,r1) ssexr(0x66, X86_SSE_G2X,r0,r1)
+# define movqxr(r0,r1) sselxr(0x66, X86_SSE_X2G,r0,r1)
+# define movqrx(r0,r1) sselxr(0x66, X86_SSE_G2X,r0,r1)
# define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
# define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
# define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
# define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
# define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
-# define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1)
# if __X64 && !__X64_32
# define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
static void
static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
# define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
+# define sse_fmar_f(r0, r1, r2, r3) _sse_fmar_f(_jit, r0, r1, r2, r3)
+static void _sse_fmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fmar_d(r0, r1, r2, r3) _sse_fmar_d(_jit, r0, r1, r2, r3)
+static void _sse_fmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fmsr_f(r0, r1, r2, r3) _sse_fmsr_f(_jit, r0, r1, r2, r3)
+static void _sse_fmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fmsr_d(r0, r1, r2, r3) _sse_fmsr_d(_jit, r0, r1, r2, r3)
+static void _sse_fmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmar_f(r0, r1, r2, r3) _sse_fnmar_f(_jit, r0, r1, r2, r3)
+static void _sse_fnmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmar_d(r0, r1, r2, r3) _sse_fnmar_d(_jit, r0, r1, r2, r3)
+static void _sse_fnmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmsr_f(r0, r1, r2, r3) _sse_fnmsr_f(_jit, r0, r1, r2, r3)
+static void _sse_fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmsr_d(r0, r1, r2, r3) _sse_fnmsr_d(_jit, r0, r1, r2, r3)
+static void _sse_fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
static void
static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
#define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
+# define sse_movr_w_f(r0,r1) movdxr(r0, r1)
+# define sse_movr_f_w(r0,r1) movdrx(r1, r0)
+#define sse_movi_w_f(r0, i0) _sse_movi_w_f(_jit, r0, i0)
+static void _sse_movi_w_f(jit_state_t*, jit_int32_t, jit_word_t);
# define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define sse_unldr_x(r0, r1, i0) _sse_unldr_x(_jit, r0, r1, i0)
+static void _sse_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define sse_unldi_x(r0, i0, i1) _sse_unldi_x(_jit, r0, i0, i1)
+static void _sse_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
# define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
# define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define sse_unstr_x(r0, r1, i0) _sse_unstr_x(_jit, r0, r1, i0)
+static void _sse_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define sse_unsti_x(i0, r0, i1) _sse_unsti_x(_jit, i0, r0, i1)
+static void _sse_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
# define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
#define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
+# if __X32 || __X64_32
+# define sse_movr_ww_d(r0, r1, r2) _sse_movr_ww_d(_jit, r0, r1, r2)
+static void _sse_movr_ww_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+# define sse_movr_d_ww(r0, r1, r2) _sse_movr_d_ww(_jit, r0, r1, r2)
+static void _sse_movr_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+# define sse_movi_ww_d(r0, i0, i1) _sse_movi_ww_d(_jit, r0, i0, i1)
+static void _sse_movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+# else
+# define sse_movr_w_d(r0, r1) movqxr(r0, r1)
+# define sse_movr_d_w(r0, r1) movqrx(r1, r0)
+# define sse_movi_w_d(r0, i0) _sse_movi_w_d(_jit, r0, i0)
+static void _sse_movi_w_d(jit_state_t*, jit_int32_t, jit_word_t);
+# endif
# define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
# define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
imovi(rn(ireg), 0x80000000);
if (r0 == r1) {
freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
- movdlxr(rn(freg), rn(ireg));
+ movdxr(rn(freg), rn(ireg));
xorpsr(r0, rn(freg));
jit_unget_reg(freg);
}
else {
- movdlxr(r0, rn(ireg));
+ movdxr(r0, rn(ireg));
xorpsr(r0, r1);
}
jit_unget_reg(ireg);
imovi(rn(ireg), 0x80000000);
if (r0 == r1) {
freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
- movdlxr(rn(freg), rn(ireg));
+ movdxr(rn(freg), rn(ireg));
pslq(rn(freg), 32);
xorpdr(r0, rn(freg));
jit_unget_reg(freg);
}
else {
- movdlxr(r0, rn(ireg));
+ movdxr(r0, rn(ireg));
pslq(r0, 32);
xorpdr(r0, r1);
}
jit_unget_reg(ireg);
}
+/* r1 = (r1 * r3) + r2 */
+#define vfmadd132ss(r1, r2, r3) _vfmadd132sx(_jit, 0, r1, r2, r3)
+#define vfmadd132sd(r1, r2, r3) _vfmadd132sx(_jit, 1, r1, r2, r3)
+static void
+_vfmadd132sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMADD132SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0x99);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r1 * r3) - r2 */
+#define vfmsub132ss(r1, r2, r3) _vfmsub132sx(_jit, 0, r1, r2, r3)
+#define vfmsub132sd(r1, r2, r3) _vfmsub132sx(_jit, 1, r1, r2, r3)
+static void
+_vfmsub132sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMSUB132SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0x9b);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r1 * r2) + r3 */
+#define vfmadd213ss(r1, r2, r3) _vfmadd213sx(_jit, 0, r1, r2, r3)
+#define vfmadd213sd(r1, r2, r3) _vfmadd213sx(_jit, 1, r1, r2, r3)
+static void
+_vfmadd213sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMADD132SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0xa9);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r1 * r2) - r3 */
+#define vfmsub213ss(r1, r2, r3) _vfmsub213sx(_jit, 0, r1, r2, r3)
+#define vfmsub213sd(r1, r2, r3) _vfmsub213sx(_jit, 1, r1, r2, r3)
+static void
+_vfmsub213sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMSUB132SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0xab);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r2 * r3) + r1 */
+#define vfmadd231ss(r1, r2, r3) _vfmadd231sx(_jit, 0, r1, r2, r3)
+#define vfmadd231sd(r1, r2, r3) _vfmadd231sx(_jit, 1, r1, r2, r3)
+static void
+_vfmadd231sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMADD231SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0xb9);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r2 * r3) - r1 */
+#define vfmsub231ss(r1, r2, r3) _vfmsub231sx(_jit, 0, r1, r2, r3)
+#define vfmsub231sd(r1, r2, r3) _vfmsub231sx(_jit, 1, r1, r2, r3)
+static void
+_vfmsub231sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMSUB231SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0xbb);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+static void
+_sse_fmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_movr_f(r0, r1);
+ vfmadd213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_movr_f(rn(t0), r1);
+ vfmadd213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (r0 != r3) {
+ sse_mulr_f(r0, r1, r2);
+ sse_addr_f(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_mulr_f(rn(t0), r1, r2);
+ sse_addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_sse_fmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_movr_d(r0, r1);
+ vfmadd213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_movr_d(rn(t0), r1);
+ vfmadd213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (r0 != r3) {
+ sse_mulr_d(r0, r1, r2);
+ sse_addr_d(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_mulr_d(rn(t0), r1, r2);
+ sse_addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_sse_fmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_movr_f(r0, r1);
+ vfmsub213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_movr_f(rn(t0), r1);
+ vfmsub213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (r0 != r3) {
+ sse_mulr_f(r0, r1, r2);
+ sse_subr_f(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_mulr_f(rn(t0), r1, r2);
+ sse_subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_sse_fmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_movr_d(r0, r1);
+ vfmsub213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_movr_d(rn(t0), r1);
+ vfmsub213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (r0 != r3) {
+ sse_mulr_d(r0, r1, r2);
+ sse_subr_d(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_mulr_d(rn(t0), r1, r2);
+ sse_subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_sse_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_f(r0, r1);
+ vfmsub213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ vfmsub213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ sse_mulr_f(rn(t0), rn(t0), r2);
+ sse_subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_d(r0, r1);
+ vfmsub213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ vfmsub213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ sse_mulr_d(rn(t0), rn(t0), r2);
+ sse_subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_f(r0, r1);
+ vfmadd213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ vfmadd213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ sse_mulr_f(rn(t0), rn(t0), r2);
+ sse_addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_d(r0, r1);
+ vfmadd213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ vfmadd213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ sse_mulr_d(rn(t0), rn(t0), r2);
+ sse_addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
static void
_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), data.i);
- movdlxr(r0, rn(reg));
+ movdxr(r0, rn(reg));
jit_unget_reg(reg);
}
}
}
+static void
+_sse_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ movdxr(r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
fopi(lt)
fopi(le)
}
}
+static void
+_sse_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ assert(i0 == 4 || i0 == 8);
+ if (i0 == 4)
+ sse_ldr_f(r0, r1);
+ else
+ sse_ldr_d(r0, r1);
+}
+
+static void
+_sse_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+ assert(i1 == 4 || i1 == 8);
+ if (i1 == 4)
+ sse_ldi_f(r0, i0);
+ else
+ sse_ldi_d(r0, i0);
+}
+
static void
_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
}
}
+static void
+_sse_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ assert(i0 == 4 || i0 == 8);
+ if (i0 == 4)
+ sse_str_f(r0, r1);
+ else
+ sse_str_d(r0, r1);
+}
+
+static void
+_sse_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+ assert(i1 == 4 || i1 == 8);
+ if (i1 == 4)
+ sse_sti_f(i0, r0);
+ else
+ sse_sti_d(i0, r0);
+}
+
static jit_word_t
_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
reg = jit_get_reg(jit_class_gpr);
#if __X64 && !__X64_32
movi(rn(reg), data.w);
- movdqxr(r0, rn(reg));
+ movqxr(r0, rn(reg));
jit_unget_reg(reg);
#else
CHECK_CVT_OFFSET();
}
}
+#if __X32 || __X64_32
+static void
+_sse_movr_ww_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ CHECK_CVT_OFFSET();
+ stxi_i(CVT_OFFSET, _RBP_REGNO, r1);
+ stxi_i(CVT_OFFSET + 4, _RBP_REGNO, r2);
+ sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
+}
+
+static void
+_sse_movr_d_ww(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ CHECK_CVT_OFFSET();
+ sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r2);
+ ldxi_i(r0, _RBP_REGNO, CVT_OFFSET);
+ ldxi_i(r1, _RBP_REGNO, CVT_OFFSET + 4);
+}
+
+static void
+_sse_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+ jit_int32_t reg;
+ CHECK_CVT_OFFSET();
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
+ movi(rn(reg), i1);
+ stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
+ sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
+ jit_unget_reg(reg);
+}
+#else
+static void
+_sse_movi_w_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ movqxr(r0, rn(reg));
+ jit_unget_reg(reg);
+}
+#endif
+
static void
_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{