/*
- * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
*/
#if PROTO
-# if __X32
-# define sse_address_p(i0) 1
-# else
-# if __X64_32
-# define sse_address_p(i0) ((jit_word_t)(i0) >= 0)
-# else
-# define sse_address_p(i0) can_sign_extend_int_p(i0)
-# endif
-# endif
# define _XMM6_REGNO 6
# define _XMM7_REGNO 7
# define _XMM8_REGNO 8
# define sser(c,r0,r1) _sser(_jit,c,r0,r1)
static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
-static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+static void _ssexr(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
# define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
# define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
# define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
-# define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
+# define movdxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
+# define movdrx(r0,r1) ssexr(0x66, X86_SSE_G2X,r0,r1)
+# define movqxr(r0,r1) sselxr(0x66, X86_SSE_X2G,r0,r1)
+# define movqrx(r0,r1) sselxr(0x66, X86_SSE_G2X,r0,r1)
# define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
# define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
# define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
# define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
# define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
-# define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1)
# if __X64 && !__X64_32
# define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
static void
static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
# define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
+# define sse_fmar_f(r0, r1, r2, r3) _sse_fmar_f(_jit, r0, r1, r2, r3)
+static void _sse_fmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fmar_d(r0, r1, r2, r3) _sse_fmar_d(_jit, r0, r1, r2, r3)
+static void _sse_fmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fmsr_f(r0, r1, r2, r3) _sse_fmsr_f(_jit, r0, r1, r2, r3)
+static void _sse_fmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fmsr_d(r0, r1, r2, r3) _sse_fmsr_d(_jit, r0, r1, r2, r3)
+static void _sse_fmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmar_f(r0, r1, r2, r3) _sse_fnmar_f(_jit, r0, r1, r2, r3)
+static void _sse_fnmar_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmar_d(r0, r1, r2, r3) _sse_fnmar_d(_jit, r0, r1, r2, r3)
+static void _sse_fnmar_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmsr_f(r0, r1, r2, r3) _sse_fnmsr_f(_jit, r0, r1, r2, r3)
+static void _sse_fnmsr_f(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define sse_fnmsr_d(r0, r1, r2, r3) _sse_fnmsr_d(_jit, r0, r1, r2, r3)
+static void _sse_fnmsr_d(jit_state_t*,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
static void
static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
#define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
+# define sse_movr_w_f(r0,r1) movdxr(r0, r1)
+# define sse_movr_f_w(r0,r1) movdrx(r1, r0)
+#define sse_movi_w_f(r0, i0) _sse_movi_w_f(_jit, r0, i0)
+static void _sse_movi_w_f(jit_state_t*, jit_int32_t, jit_word_t);
# define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define sse_unldr_x(r0, r1, i0) _sse_unldr_x(_jit, r0, r1, i0)
+static void _sse_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define sse_unldi_x(r0, i0, i1) _sse_unldi_x(_jit, r0, i0, i1)
+static void _sse_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
# define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
# define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define sse_unstr_x(r0, r1, i0) _sse_unstr_x(_jit, r0, r1, i0)
+static void _sse_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define sse_unsti_x(i0, r0, i1) _sse_unsti_x(_jit, i0, r0, i1)
+static void _sse_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
# define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
#define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
+# if __X32 || __X64_32
+# define sse_movr_ww_d(r0, r1, r2) _sse_movr_ww_d(_jit, r0, r1, r2)
+static void _sse_movr_ww_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+# define sse_movr_d_ww(r0, r1, r2) _sse_movr_d_ww(_jit, r0, r1, r2)
+static void _sse_movr_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+# define sse_movi_ww_d(r0, i0, i1) _sse_movi_ww_d(_jit, r0, i0, i1)
+static void _sse_movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+# else
+# define sse_movr_w_d(r0, r1) movqxr(r0, r1)
+# define sse_movr_d_w(r0, r1) movqrx(r1, r0)
+# define sse_movi_w_d(r0, i0) _sse_movi_w_d(_jit, r0, i0)
+static void _sse_movi_w_d(jit_state_t*, jit_int32_t, jit_word_t);
+# endif
# define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
# define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
jit_word_t i0, jit_int32_t r0, \
jit_float##size##_t *i1) \
{ \
- jit_word_t word; \
+ jit_word_t w; \
jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
jit_class_nospill); \
assert(jit_sse_reg_p(reg)); \
sse_movi_##type(rn(reg), i1); \
- word = sse_b##name##r_##type(i0, r0, rn(reg)); \
+ w = sse_b##name##r_##type(i0, r0, rn(reg)); \
jit_unget_reg(reg); \
- return (word); \
+ return (w); \
}
# define fopi(name) fpr_opi(name, f, 32)
# define fbopi(name) fpr_bopi(name, f, 32)
imovi(rn(ireg), 0x80000000);
if (r0 == r1) {
freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
- movdlxr(rn(freg), rn(ireg));
+ movdxr(rn(freg), rn(ireg));
xorpsr(r0, rn(freg));
jit_unget_reg(freg);
}
else {
- movdlxr(r0, rn(ireg));
+ movdxr(r0, rn(ireg));
xorpsr(r0, r1);
}
jit_unget_reg(ireg);
imovi(rn(ireg), 0x80000000);
if (r0 == r1) {
freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
- movdlxr(rn(freg), rn(ireg));
+ movdxr(rn(freg), rn(ireg));
pslq(rn(freg), 32);
xorpdr(r0, rn(freg));
jit_unget_reg(freg);
}
else {
- movdlxr(r0, rn(ireg));
+ movdxr(r0, rn(ireg));
pslq(r0, 32);
xorpdr(r0, r1);
}
jit_unget_reg(ireg);
}
+/* r1 = (r1 * r3) + r2 */
+#define vfmadd132ss(r1, r2, r3) _vfmadd132sx(_jit, 0, r1, r2, r3)
+#define vfmadd132sd(r1, r2, r3) _vfmadd132sx(_jit, 1, r1, r2, r3)
+static void
+_vfmadd132sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMADD132SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0x99);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r1 * r3) - r2 */
+#define vfmsub132ss(r1, r2, r3) _vfmsub132sx(_jit, 0, r1, r2, r3)
+#define vfmsub132sd(r1, r2, r3) _vfmsub132sx(_jit, 1, r1, r2, r3)
+static void
+_vfmsub132sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMSUB132SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0x9b);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r1 * r2) + r3 */
+#define vfmadd213ss(r1, r2, r3) _vfmadd213sx(_jit, 0, r1, r2, r3)
+#define vfmadd213sd(r1, r2, r3) _vfmadd213sx(_jit, 1, r1, r2, r3)
+static void
+_vfmadd213sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMADD132SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0xa9);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r1 * r2) - r3 */
+#define vfmsub213ss(r1, r2, r3) _vfmsub213sx(_jit, 0, r1, r2, r3)
+#define vfmsub213sd(r1, r2, r3) _vfmsub213sx(_jit, 1, r1, r2, r3)
+static void
+_vfmsub213sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMSUB132SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0xab);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r2 * r3) + r1 */
+#define vfmadd231ss(r1, r2, r3) _vfmadd231sx(_jit, 0, r1, r2, r3)
+#define vfmadd231sd(r1, r2, r3) _vfmadd231sx(_jit, 1, r1, r2, r3)
+static void
+_vfmadd231sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMADD231SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0xb9);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+/* r1 = (r2 * r3) - r1 */
+#define vfmsub231ss(r1, r2, r3) _vfmsub231sx(_jit, 0, r1, r2, r3)
+#define vfmsub231sd(r1, r2, r3) _vfmsub231sx(_jit, 1, r1, r2, r3)
+static void
+_vfmsub231sx(jit_state_t *_jit, jit_bool_t dbl,
+ jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ /* VFMSUB231SD */
+ vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
+ ic(0xbb);
+ mrm(0x03, r7(r1), r7(r3));
+}
+
+static void
+_sse_fmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_movr_f(r0, r1);
+ vfmadd213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_movr_f(rn(t0), r1);
+ vfmadd213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (r0 != r3) {
+ sse_mulr_f(r0, r1, r2);
+ sse_addr_f(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_mulr_f(rn(t0), r1, r2);
+ sse_addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_sse_fmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_movr_d(r0, r1);
+ vfmadd213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_movr_d(rn(t0), r1);
+ vfmadd213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (r0 != r3) {
+ sse_mulr_d(r0, r1, r2);
+ sse_addr_d(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_mulr_d(rn(t0), r1, r2);
+ sse_addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_sse_fmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_movr_f(r0, r1);
+ vfmsub213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_movr_f(rn(t0), r1);
+ vfmsub213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (r0 != r3) {
+ sse_mulr_f(r0, r1, r2);
+ sse_subr_f(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_mulr_f(rn(t0), r1, r2);
+ sse_subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_sse_fmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_movr_d(r0, r1);
+ vfmsub213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_movr_d(rn(t0), r1);
+ vfmsub213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (r0 != r3) {
+ sse_mulr_d(r0, r1, r2);
+ sse_subr_d(r0, r0, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_mulr_d(rn(t0), r1, r2);
+ sse_subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
+static void
+_sse_fnmar_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_f(r0, r1);
+ vfmsub213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ vfmsub213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ sse_mulr_f(rn(t0), rn(t0), r2);
+ sse_subr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmar_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_d(r0, r1);
+ vfmsub213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ vfmsub213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ sse_mulr_d(rn(t0), rn(t0), r2);
+ sse_subr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmsr_f(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_f(r0, r1);
+ vfmadd213ss(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ vfmadd213ss(rn(t0), r2, r3);
+ sse_movr_f(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_f(rn(t0), r1);
+ sse_mulr_f(rn(t0), rn(t0), r2);
+ sse_addr_f(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_sse_fnmsr_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t t0;
+ if (jit_cpu.fma) {
+ if (r0 != r2 && r0 != r3) {
+ sse_negr_d(r0, r1);
+ vfmadd213sd(r0, r2, r3);
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ vfmadd213sd(rn(t0), r2, r3);
+ sse_movr_d(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
+ sse_negr_d(rn(t0), r1);
+ sse_mulr_d(rn(t0), rn(t0), r2);
+ sse_addr_d(r0, rn(t0), r3);
+ jit_unget_reg(t0);
+ }
+}
+
static void
_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
ldi = !_jitc->no_data;
#if __X64
/* if will allocate a register for offset, just use immediate */
- if (ldi && !sse_address_p(i0))
+# if CAN_RIP_ADDRESS
+ if (ldi) {
+ jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ ldi = can_sign_extend_int_p(rel);
+ if (!ldi && address_p(i0))
+ ldi = 1;
+ }
+# else
+ if (ldi && !address_p(i0))
ldi = 0;
+# endif
#endif
if (ldi)
sse_ldi_f(r0, (jit_word_t)i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), data.i);
- movdlxr(r0, rn(reg));
+ movdxr(r0, rn(reg));
jit_unget_reg(reg);
}
}
}
+static void
+_sse_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ movdxr(r0, rn(reg));
+ jit_unget_reg(reg);
+}
+
fopi(lt)
fopi(le)
}
ixorr(reg, reg);
ucomissr(r2, r1);
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_E, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
}
imovi(reg, 1);
ucomissr(r2, r1);
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_NE, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
_sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ if (can_sign_extend_int_p(rel))
+ movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
+ else
+#endif
+ if (address_p(i0))
movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
else {
reg = jit_get_reg(jit_class_gpr);
}
}
+static void
+_sse_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ assert(i0 == 4 || i0 == 8);
+ if (i0 == 4)
+ sse_ldr_f(r0, r1);
+ else
+ sse_ldr_d(r0, r1);
+}
+
+static void
+_sse_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+ assert(i1 == 4 || i1 == 8);
+ if (i1 == 4)
+ sse_ldi_f(r0, i0);
+ else
+ sse_ldi_d(r0, i0);
+}
+
static void
_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ if (can_sign_extend_int_p(rel))
+ movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
+ else
+#endif
+ if (address_p(i0))
movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
}
}
+static void
+_sse_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ assert(i0 == 4 || i0 == 8);
+ if (i0 == 4)
+ sse_str_f(r0, r1);
+ else
+ sse_str_d(r0, r1);
+}
+
+static void
+_sse_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+ assert(i1 == 4 || i1 == 8);
+ if (i1 == 4)
+ sse_sti_f(i0, r0);
+ else
+ sse_sti_d(i0, r0);
+}
+
static jit_word_t
_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
- ja(i0);
- return (_jit->pc.w);
+ return (ja(i0));
}
fbopi(lt)
_sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
- jae(i0);
- return (_jit->pc.w);
+ return (jae(i0));
}
fbopi(le)
static jit_word_t
_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_word_t jp_code;
ucomissr(r0, r1);
- jps(0);
- jp_code = _jit->pc.w;
- je(i0);
- patch_rel_char(jp_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jps(0);
+ w = je(i0);
+ patch_at(jp_code, _jit->pc.w);
+ return (w);
}
fbopi(eq)
_sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jae(i0);
- return (_jit->pc.w);
+ return (jae(i0));
}
fbopi(ge)
_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- ja(i0);
- return (_jit->pc.w);
+ return (ja(i0));
}
fbopi(gt)
static jit_word_t
_sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_word_t jp_code;
jit_word_t jz_code;
ucomissr(r0, r1);
- jps(0);
- jp_code = _jit->pc.w;
- jzs(0);
- jz_code = _jit->pc.w;
- patch_rel_char(jp_code, _jit->pc.w);
- jmpi(i0);
- patch_rel_char(jz_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jps(0);
+ jz_code = jzs(0);
+ patch_at(jp_code, _jit->pc.w);
+ w = jmpi(i0);
+ patch_at(jz_code, _jit->pc.w);
+ return (w);
}
fbopi(ne)
_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jnae(i0);
- return (_jit->pc.w);
+ return (jnae(i0));
}
fbopi(unlt)
static jit_word_t
_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomissr(r0, r1);
- jna(i0);
+ w = jna(i0);
}
- return (_jit->pc.w);
+ return (w);
}
fbopi(unle)
static jit_word_t
_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomissr(r0, r1);
- je(i0);
+ w = je(i0);
}
- return (_jit->pc.w);
+ return (w);
}
fbopi(uneq)
static jit_word_t
_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomissr(r1, r0);
- jna(i0);
+ w = jna(i0);
}
- return (_jit->pc.w);
+ return (w);
}
fbopi(unge)
_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
- jnae(i0);
- return (_jit->pc.w);
+ return (jnae(i0));
}
fbopi(ungt)
_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jne(i0);
- return (_jit->pc.w);
+ return (jne(i0));
}
fbopi(ltgt)
_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jnp(i0);
- return (_jit->pc.w);
+ return (jnp(i0));
}
fbopi(ord)
_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jp(i0);
- return (_jit->pc.w);
+ return (jp(i0));
}
fbopi(unord)
}
ixorr(reg, reg);
ucomisdr(r2, r1);
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_E, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
}
imovi(reg, 1);
ucomisdr(r2, r1);
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_NE, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
ldi = !_jitc->no_data;
#if __X64
/* if will allocate a register for offset, just use immediate */
- if (ldi && !sse_address_p(i0))
+# if CAN_RIP_ADDRESS
+ if (ldi) {
+ jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ ldi = can_sign_extend_int_p(rel);
+ if (!ldi && address_p(i0))
+ ldi = 1;
+ }
+# else
+ if (ldi && !address_p(i0))
ldi = 0;
+# endif
#endif
if (ldi)
sse_ldi_d(r0, (jit_word_t)i0);
reg = jit_get_reg(jit_class_gpr);
#if __X64 && !__X64_32
movi(rn(reg), data.w);
- movdqxr(r0, rn(reg));
+ movqxr(r0, rn(reg));
jit_unget_reg(reg);
#else
+ CHECK_CVT_OFFSET();
movi(rn(reg), data.ii[0]);
stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
movi(rn(reg), data.ii[1]);
}
}
+#if __X32 || __X64_32
+static void
+_sse_movr_ww_d(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ CHECK_CVT_OFFSET();
+ stxi_i(CVT_OFFSET, _RBP_REGNO, r1);
+ stxi_i(CVT_OFFSET + 4, _RBP_REGNO, r2);
+ sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
+}
+
+static void
+_sse_movr_d_ww(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ CHECK_CVT_OFFSET();
+ sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r2);
+ ldxi_i(r0, _RBP_REGNO, CVT_OFFSET);
+ ldxi_i(r1, _RBP_REGNO, CVT_OFFSET + 4);
+}
+
+static void
+_sse_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+ jit_int32_t reg;
+ CHECK_CVT_OFFSET();
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
+ movi(rn(reg), i1);
+ stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
+ sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
+ jit_unget_reg(reg);
+}
+#else
+static void
+_sse_movi_w_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ movqxr(r0, rn(reg));
+ jit_unget_reg(reg);
+}
+#endif
+
static void
_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ if (can_sign_extend_int_p(rel))
+ movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
+ else
+#endif
+ if (address_p(i0))
movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
else {
reg = jit_get_reg(jit_class_gpr);
_sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ if (can_sign_extend_int_p(rel))
+ movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
+ else
+#endif
+ if (address_p(i0))
movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
- ja(i0);
- return (_jit->pc.w);
+ return (ja(i0));
}
dbopi(lt)
_sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
- jae(i0);
- return (_jit->pc.w);
+ return (jae(i0));
}
dbopi(le)
static jit_word_t
_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_word_t jp_code;
ucomisdr(r0, r1);
- jps(0);
- jp_code = _jit->pc.w;
- je(i0);
- patch_rel_char(jp_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jps(0);
+ w = je(i0);
+ patch_at(jp_code, _jit->pc.w);
+ return (w);
}
dbopi(eq)
_sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jae(i0);
- return (_jit->pc.w);
+ return (jae(i0));
}
dbopi(ge)
_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- ja(i0);
- return (_jit->pc.w);
+ return (ja(i0));
}
dbopi(gt)
static jit_word_t
_sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_word_t jp_code;
jit_word_t jz_code;
ucomisdr(r0, r1);
- jps(0);
- jp_code = _jit->pc.w;
- jzs(0);
- jz_code = _jit->pc.w;
- patch_rel_char(jp_code, _jit->pc.w);
- jmpi(i0);
- patch_rel_char(jz_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jps(0);
+ jz_code = jzs(0);
+ patch_at(jp_code, _jit->pc.w);
+ w = jmpi(i0);
+ patch_at(jz_code, _jit->pc.w);
+ return (w);
}
dbopi(ne)
_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jnae(i0);
- return (_jit->pc.w);
+ return (jnae(i0));
}
dbopi(unlt)
static jit_word_t
_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomisdr(r0, r1);
- jna(i0);
+ w = jna(i0);
}
- return (_jit->pc.w);
+ return (w);
}
dbopi(unle)
static jit_word_t
_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomisdr(r0, r1);
- je(i0);
+ w = je(i0);
}
- return (_jit->pc.w);
+ return (w);
}
dbopi(uneq)
static jit_word_t
_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomisdr(r1, r0);
- jna(i0);
+ w = jna(i0);
}
- return (_jit->pc.w);
+ return (w);
}
dbopi(unge)
_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
- jnae(i0);
- return (_jit->pc.w);
+ return (jnae(i0));
}
dbopi(ungt)
_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jne(i0);
- return (_jit->pc.w);
+ return (jne(i0));
}
dbopi(ltgt)
_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jnp(i0);
- return (_jit->pc.w);
+ return (jnp(i0));
}
dbopi(ord)
_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jp(i0);
- return (_jit->pc.w);
+ return (jp(i0));
}
dbopi(unord)
# undef fopi