/*
- * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#define USE_INC_DEC 0
#if PROTO
+# if __WORDSIZE == 64 && _WIN32
+# define ONE 1LL
+# else
+# define ONE 1L
+# endif
# if __X32 || __X64_32
# define WIDE 0
# define ldi(u, v) ldi_i(u, v)
# define ldr(u, v) ldr_i(u, v)
# define ldxr(u, v, w) ldxr_i(u, v, w)
# define ldxi(u, v, w) ldxi_i(u, v, w)
+# define str(u, v) str_i(u, v)
# define sti(u, v) sti_i(u, v)
+# define stxr(u, v, w) stxr_i(u, v, w)
# define stxi(u, v, w) stxi_i(u, v, w)
# define can_sign_extend_int_p(im) 1
# define can_zero_extend_int_p(im) 1
# define ldr(u, v) ldr_l(u, v)
# define ldxr(u, v, w) ldxr_l(u, v, w)
# define ldxi(u, v, w) ldxi_l(u, v, w)
+# define str(u, v) str_l(u, v)
# define sti(u, v) sti_l(u, v)
+# define stxr(u, v, w) stxr_l(u, v, w)
# define stxi(u, v, w) stxi_l(u, v, w)
# define can_sign_extend_int_p(im) \
- (((im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \
- ((im) < 0 && (long long)(im) > -0x80000000LL))
+ (((long long)(im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \
+ ((long long)(im) < 0 && (long long)(im) > -0x80000000LL))
# define can_zero_extend_int_p(im) \
((im) >= 0 && (im) < 0x80000000LL)
# define fits_uint32_p(im) (((im) & 0xffffffff00000000LL) == 0)
# else
# define il(l) ii(l)
# endif
-# define patch_abs(instr, label) \
- *(jit_word_t *)(instr - sizeof(jit_word_t)) = label
-# define patch_rel(instr, label) \
- *(jit_int32_t *)(instr - 4) = label - instr
-# define patch_rel_char(instr, label) \
- *(jit_int8_t *)(instr - 1) = label - instr
# define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b)
static void
_rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define rx(rd, md, rb, ri, ms) _rx(_jit, rd, md, rb, ri, ms)
static void
_rx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+/*
+ * prefix 8 bits 0xc4 Three byte VEX
+ * 0xc5 Two byte VEX
+ * 0x8f Three byte XOP
+ * ~R 1 bit Inverted REX.R
+ * ~X 1 bit Inverted REX.X
+ * ~B 1 bit Inverted REX.B
+ * map 5 bits Opcode map to use
+ * W 1 bit REX.W for integer, otherwise opcode extension
+ * ~vvvv 4 bits Inverted XMM or YMM registers
+ * L 1 bit 128 bit vector if 0, 256 otherwise
+ * pp 2 bits Mandatory prefix
+ * 00 none
+ * 01 0x66
+ * 10 0xf3
+ * 11 0xf2
+ *
+ * Three byte VEX:
+ * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
+ * | 1 1 0 0 0 1 0 0 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
+ * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
+ * Three byte XOP:
+ * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
+ * | 1 0 0 0 1 1 1 1 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
+ * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
+ * Two byte VEX:
+ * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
+ * | 1 1 0 0 0 1 0 1 | |~R | ~vvvv | L | pp |
+ * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
+ */
+# define vex(r,x,b,map,w,vvvv,l,pp) _vex(_jit,r,x,b,map,w,vvvv,l,pp)
+static void
+_vex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define nop(n) _nop(_jit, n)
static void _nop(jit_state_t*, jit_int32_t);
# define emms() is(0x770f)
static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
#define addci(r0, r1, i0) _addci(_jit, r0, r1, i0)
static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-# define iaddxr(r0, r1) alur(X86_ADC, r0, r1)
+# define iaddxr(r0, r1) _iaddxr(_jit, r0, r1)
+static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t);
# define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2)
static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define iaddxi(r0, i0) alui(X86_ADC, r0, i0)
static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 1)
+# define hmulr_u(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 0)
+# define hmuli(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 1)
+# define hmuli_u(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 0)
# define umulr(r0) unr(X86_IMUL, r0)
# define umulr_u(r0) unr(X86_MUL, r0)
# define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1)
static void
_rotshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
# define lshr(r0, r1, r2) rotshr(X86_SHL, r0, r1, r2)
+# define qlshr(r0, r1, r2, r3) xlshr(1, r0, r1, r2, r3)
+# define xlshr(s, r0, r1, r2, r3) _xlshr(_jit, s, r0, r1, r2, r3)
+static void
+_xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define lshi(r0, r1, i0) _lshi(_jit, r0, r1, i0)
static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0)
+# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0)
+static void
+_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+# define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3)
+# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0)
# define rshr(r0, r1, r2) rotshr(X86_SAR, r0, r1, r2)
# define rshi(r0, r1, i0) rotshi(X86_SAR, r0, r1, i0)
# define rshr_u(r0, r1, r2) rotshr(X86_SHR, r0, r1, r2)
# define rshi_u(r0, r1, i0) rotshi(X86_SHR, r0, r1, i0)
+# define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3)
+# define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3)
+# define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3)
+static void
+_xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0)
+# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0)
+# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0)
+static void
+_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+# define lrotr(r0, r1, r2) rotshr(X86_ROL, r0, r1, r2)
+# define lroti(r0, r1, i0) rotshi(X86_ROL, r0, r1, i0)
+# define rrotr(r0, r1, r2) rotshr(X86_ROR, r0, r1, r2)
+# define rroti(r0, r1, i0) rotshi(X86_ROR, r0, r1, i0)
# define unr(code, r0) _unr(_jit, code, r0)
static void _unr(jit_state_t*, jit_int32_t, jit_int32_t);
# define inegr(r0) unr(X86_NEG, r0)
# define decr(r0, r1) _decr(_jit, r0, r1)
static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
# endif
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define rbitr(r0, r1) _rbitr(_jit, r0, r1)
+static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define popcntr(r0, r1) _popcntr(_jit, r0, r1)
+static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t);
# define cr(code, r0, r1, r2) _cr(_jit, code, r0, r1, r2)
static void
_cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# define imovi(r0, i0) _imovi(_jit, r0, i0)
static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
# define movi(r0, i0) _movi(_jit, r0, i0)
-static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
+static
+# if CAN_RIP_ADDRESS
+jit_word_t
+# else
+void
+# endif
+_movi(jit_state_t*, jit_int32_t, jit_word_t);
# define movi_p(r0, i0) _movi_p(_jit, r0, i0)
static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
# define movcr(r0, r1) _movcr(_jit, r0, r1)
static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
# define movsr_u(r0, r1) _movsr_u(_jit, r0, r1)
static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
#define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2)
static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
#define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2)
# define movir_u(r0, r1) _movir_u(_jit, r0, r1)
static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
# endif
-# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
# if __X64 && !__X64_32
-#define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1)
-static void _htonr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
#endif
+# define extr(r0, r1, i0, i1) _extr(_jit, r0, r1, i0, i1)
+static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
+# define extr_u(r0, r1, i0, i1) _extr_u(_jit, r0, r1, i0, i1)
+static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
+# define depr(r0, r1, i0, i1) _depr(_jit, r0, r1, i0, i1)
+static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
# define extr_c(r0, r1) _extr_c(_jit, r0, r1)
static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_uc(r0, r1) _extr_uc(_jit, r0, r1)
static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# endif
# endif
+# define unldr(r0, r1, i0) generic_unldr(r0, r1, i0)
+# define unldi(r0, i0, i1) generic_unldi(r0, i0, i1)
+# define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0)
+# define unldi_u(r0, i0, i1) generic_unldi_u(r0, i0, i1)
# define str_c(r0, r1) _str_c(_jit, r0, r1)
static void _str_c(jit_state_t*, jit_int32_t, jit_int32_t);
# define sti_c(i0, r0) _sti_c(_jit, i0, r0)
# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1)
static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
# endif
+#define unstr(r0, r1, i0) generic_unstr(r0, r1, i0)
+#define unsti(i0, r0, i1) generic_unsti(i0, r0, i1)
# define jcc(code, i0) _jcc(_jit, code, i0)
# define jo(i0) jcc(X86_CC_O, i0)
# define jno(i0) jcc(X86_CC_NO, i0)
# define jng(i0) jcc(X86_CC_NG, i0)
# define jg(i0) jcc(X86_CC_G, i0)
# define jnle(i0) jcc(X86_CC_NLE, i0)
-static void _jcc(jit_state_t*, jit_int32_t, jit_word_t);
+static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t);
# define jccs(code, i0) _jccs(_jit, code, i0)
# define jos(i0) jccs(X86_CC_O, i0)
# define jnos(i0) jccs(X86_CC_NO, i0)
# define jngs(i0) jccs(X86_CC_NG, i0)
# define jgs(i0) jccs(X86_CC_G, i0)
# define jnles(i0) jccs(X86_CC_NLE, i0)
-static void _jccs(jit_state_t*, jit_int32_t, jit_word_t);
+static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t);
# define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1)
-static void _jcr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _jcr(jit_state_t*,
+ jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
# define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1)
-static void _jci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+static jit_word_t _jci(jit_state_t*,
+ jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
# define jci0(code, i0, r0) _jci0(_jit, code, i0, r0)
-static void _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
+static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
# define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1)
static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
# define blti(i0, r0, i1) _blti(_jit, i0, r0, i1)
# define jmpi_p(i0) jmpi(i0)
# endif
# define jmpsi(i0) _jmpsi(_jit, i0)
-static void _jmpsi(jit_state_t*, jit_uint8_t);
+static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t);
# define prolog(node) _prolog(_jit, node)
static void _prolog(jit_state_t*, jit_node_t*);
# define epilog(node) _epilog(_jit, node)
static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
# define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0)
static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
-# define patch_at(node, instr, label) _patch_at(_jit, node, instr, label)
-static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t);
+# define patch_at(instr, label) _patch_at(_jit, instr, label)
+static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
# if !defined(HAVE_FFSL)
# if __X32
# define ffsl(i) __builtin_ffs(i)
{
if (ri == _NOREG) {
if (rb == _NOREG) {
-#if __X32
- mrm(0x00, r7(rd), 0x05);
-#else
- mrm(0x00, r7(rd), 0x04);
- sib(_SCL1, 0x04, 0x05);
+ /* Use ms == _SCL8 to tell it is a %rip relative displacement */
+#if __X64
+ if (ms == _SCL8)
+#endif
+ mrm(0x00, r7(rd), 0x05);
+#if __X64
+ else {
+ mrm(0x00, r7(rd), 0x04);
+ sib(_SCL1, 0x04, 0x05);
+ }
#endif
ii(md);
}
}
static void
-_nop(jit_state_t *_jit, jit_int32_t count)
+_vex(jit_state_t *_jit, jit_int32_t r, jit_int32_t x, jit_int32_t b,
+ jit_int32_t map, jit_int32_t w, jit_int32_t vvvv, jit_int32_t l,
+ jit_int32_t pp)
{
- switch (count) {
- case 0:
- break;
- case 1: /* NOP */
- ic(0x90); break;
- case 2: /* 66 NOP */
- ic(0x66); ic(0x90);
- break;
- case 3: /* NOP DWORD ptr [EAX] */
- ic(0x0f); ic(0x1f); ic(0x00);
- break;
- case 4: /* NOP DWORD ptr [EAX + 00H] */
- ic(0x0f); ic(0x1f); ic(0x40); ic(0x00);
- break;
- case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
- ic(0x0f); ic(0x1f); ic(0x44); ic(0x00);
- ic(0x00);
- break;
- case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
- ic(0x66); ic(0x0f); ic(0x1f); ic(0x44);
- ic(0x00); ic(0x00);
- break;
- case 7: /* NOP DWORD ptr [EAX + 00000000H] */
- ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000);
- break;
- case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
- ic(0x0f); ic(0x1f); ic(0x84); ic(0x00);
- ii(0x0000);
- break;
- case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
- ic(0x66); ic(0x0f); ic(0x1f); ic(0x84);
- ic(0x00); ii(0x0000);
- break;
- default:
- abort();
+ jit_int32_t v;
+ if (r == _NOREG) r = 0;
+ if (x == _NOREG) x = 0;
+ if (b == _NOREG) b = 0;
+ if (map == 1 && w == 0 && ((x|b) & 8) == 0) {
+ /* Two byte prefix */
+ ic(0xc5);
+ /* ~R */
+ v = (r & 8) ? 0 : 0x80;
}
+ else {
+ /* Three byte prefix */
+ if (map >= 8)
+ ic(0x8f);
+ else
+ ic(0xc4);
+ /* map_select */
+ v = map;
+ /* ~R */
+ if (!(r & 8)) v |= 0x80;
+ /* ~X */
+ if (!(x & 8)) v |= 0x40;
+ /* ~B */
+ if (!(b & 8)) v |= 0x20;
+ ic(v);
+ /* W */
+ v = w ? 0x80 : 0;
+ }
+ /* ~vvvv */
+ v |= (~vvvv & 0x0f) << 3;
+ /* L */
+ if (l) v |= 0x04;
+ /* pp */
+ v |= pp;
+ ic(v);
}
+static void
+_nop(jit_state_t *_jit, jit_int32_t count)
+{
+ jit_int32_t i;
+ while (count) {
+ if (count > 9)
+ i = 9;
+ else
+ i = count;
+ switch (i) {
+ case 0:
+ break;
+ case 1: /* NOP */
+ ic(0x90); break;
+ case 2: /* 66 NOP */
+ ic(0x66); ic(0x90);
+ break;
+ case 3: /* NOP DWORD ptr [EAX] */
+ ic(0x0f); ic(0x1f); ic(0x00);
+ break;
+ case 4: /* NOP DWORD ptr [EAX + 00H] */
+ ic(0x0f); ic(0x1f); ic(0x40); ic(0x00);
+ break;
+ case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
+ ic(0x0f); ic(0x1f); ic(0x44); ic(0x00);
+ ic(0x00);
+ break;
+ case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
+ ic(0x66); ic(0x0f); ic(0x1f); ic(0x44);
+ ic(0x00); ic(0x00);
+ break;
+ case 7: /* NOP DWORD ptr [EAX + 00000000H] */
+ ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000);
+ break;
+ case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
+ ic(0x0f); ic(0x1f); ic(0x84); ic(0x00);
+ ii(0x0000);
+ break;
+ case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
+ ic(0x66); ic(0x0f); ic(0x1f); ic(0x84);
+ ic(0x00); ii(0x0000);
+ break;
+ }
+ count -= i;
+ }
+}
static void
_lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
}
}
+static void
+_iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ /* FIXME: this is not doing what I did expect for the simple test case:
+ * mov $0xffffffffffffffff, %rax -- rax = 0xffffffffffffffff (-1)
+ * mov $0xffffffffffffffff, %r10 -- r10 = 0xffffffffffffffff (-1)
+ * mov $0x1, %r11d -- r11 = 1
+ * xor %rbx, %rbx -- rbx = 0
+ * (gdb) p $eflags
+ * $1 = [ PF ZF IF ]
+ * add %r11, %rax -- r11 = 0x10000000000000000 (0)
+ * does not fit in 64 bit ^
+ * (gdb) p $eflags
+ * $2 = [ CF PF AF ZF IF ]
+ * adcx %r10, %rbx -- r10 = 0xffffffffffffffff (-1)
+ * (gdb) p $eflags
+ * $3 = [ CF PF AF ZF IF ]
+ * (gdb) p/x $r10
+ * $4 = 0xffffffffffffffff
+ * but, r10 should be zero, as it is:
+ * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF)
+ * FIXME: maybe should only use ADCX in the third operation onward, that
+ * is, after the first ADC? In either case, the add -1+0+carry should
+ * have used and consumed the carry? At least this is what is expected
+ * in Lightning...
+ */
+#if 0
+ /* Significantly longer instruction, but avoid cpu stalls as only
+ * the carry flag is used in a sequence. */
+ if (jit_cpu.adx) {
+ /* ADCX */
+ ic(0x66);
+ rex(0, WIDE, r1, _NOREG, r0);
+ ic(0x0f);
+ ic(0x38);
+ ic(0xf6);
+ mrm(0x03, r7(r1), r7(r0));
+ }
+ else
+#endif
+ alur(X86_ADC, r0, r1);
+}
+
static void
_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+ if (
+#if 0
+ /* Do not mix ADC and ADCX */
+ !jit_cpu.adx &&
+#endif
+ can_sign_extend_int_p(i0)) {
movr(r0, r1);
iaddxi(r0, i0);
}
}
#define savset(rn) \
- if (r0 != rn) { \
- sav |= 1 << rn; \
- if (r1 != rn && r2 != rn) \
- set |= 1 << rn; \
- }
+ do { \
+ if (r0 != rn) { \
+ sav |= 1 << rn; \
+ if (r1 != rn && r2 != rn) \
+ set |= 1 << rn; \
+ } \
+ } while (0)
#define isavset(rn) \
- if (r0 != rn) { \
- sav |= 1 << rn; \
- if (r1 != rn) \
- set |= 1 << rn; \
- }
+ do { \
+ if (r0 != rn) { \
+ sav |= 1 << rn; \
+ if (r1 != rn) \
+ set |= 1 << rn; \
+ } \
+ } while (0)
#define qsavset(rn) \
- if (r0 != rn && r1 != rn) { \
- sav |= 1 << rn; \
- if (r2 != rn && r3 != rn) \
- set |= 1 << rn; \
- }
+ do { \
+ if (r0 != rn && r1 != rn) { \
+ sav |= 1 << rn; \
+ if (r2 != rn && r3 != rn) \
+ set |= 1 << rn; \
+ } \
+ } while (0)
#define allocr(rn, rv) \
- if (set & (1 << rn)) \
- (void)jit_get_reg(rv|jit_class_gpr|jit_class_named); \
- if (sav & (1 << rn)) { \
- if ( jit_regset_tstbit(&_jitc->regsav, rv) || \
- !jit_regset_tstbit(&_jitc->reglive, rv)) \
- sav &= ~(1 << rn); \
- else \
- save(rv); \
- }
+ do { \
+ if (set & (1 << rn)) \
+ (void)jit_get_reg(rv|jit_class_gpr|jit_class_named); \
+ if (sav & (1 << rn)) { \
+ if ( jit_regset_tstbit(&_jitc->regsav, rv) || \
+ !jit_regset_tstbit(&_jitc->reglive, rv)) \
+ sav &= ~(1 << rn); \
+ else \
+ save(rv); \
+ } \
+ } while (0)
#define clear(rn, rv) \
- if (set & (1 << rn)) \
- jit_unget_reg(rv); \
- if (sav & (1 << rn)) \
- load(rv);
+ do { \
+ if (set & (1 << rn)) \
+ jit_unget_reg(rv); \
+ if (sav & (1 << rn)) \
+ load(rv); \
+ } while (0)
+
static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
else
umulr_u(mul);
- if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
- xchgr(_RAX_REGNO, _RDX_REGNO);
+ if (r0 != JIT_NOREG) {
+ if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
+ xchgr(_RAX_REGNO, _RDX_REGNO);
+ else {
+ if (r0 != _RDX_REGNO)
+ movr(r0, _RAX_REGNO);
+ movr(r1, _RDX_REGNO);
+ if (r0 == _RDX_REGNO)
+ movr(r0, _RAX_REGNO);
+ }
+ }
else {
- if (r0 != _RDX_REGNO)
- movr(r0, _RAX_REGNO);
+ assert(r1 != JIT_NOREG);
movr(r1, _RDX_REGNO);
- if (r0 == _RDX_REGNO)
- movr(r0, _RAX_REGNO);
}
clear(_RDX_REGNO, _RDX);
qdivr_u(r0, r1, r2, rn(reg));
jit_unget_reg(reg);
}
-#undef clear
-#undef allocr
-#undef savset
static void
_andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
irotshi(code, r0, i0);
}
+static void
+_xlshr(jit_state_t *_jit, jit_bool_t sign,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t sav, set;
+ jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
+ jit_word_t over, zero, over_done, done;
+ sav = set = 0;
+ /* %RCX must be used for shift. */
+ qsavset(_RCX_REGNO);
+ allocr(_RCX_REGNO, _RCX);
+ /* Almost certainly not %RCX */
+ t1 = r1;
+ if (r0 == _RCX_REGNO) {
+ s0 = jit_get_reg(jit_class_gpr);
+ t0 = rn(s0);
+ }
+ else {
+ t0 = r0;
+ /* r0 == r1 is undefined behavior */
+ if (r1 == _RCX_REGNO) {
+ s1 = jit_get_reg(jit_class_gpr);
+ t1 = rn(s1);
+ }
+ }
+ /* Allocate a temporary if a register is used more than once, or if
+ * the value to shift is %RCX */
+ if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
+ s2 = jit_get_reg(jit_class_gpr);
+ t2 = rn(s2);
+ movr(t2, r2);
+ }
+ else
+ t2 = r2;
+ /* Allocate temporary if shift is also one of the outputs */
+ if (r0 == r3 || r1 == r3) {
+ s3 = jit_get_reg(jit_class_gpr);
+ t3 = rn(s3);
+ movr(t3, r3);
+ }
+ else
+ t3 = r3;
+ /* Bits to shift right */
+ movi(t1, 0);
+ /* Shift in %RCX */
+ /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
+ movr(_RCX_REGNO, t3);
+ /* Copy value to low register */
+ movr(t0, t2);
+ /* SHLD shifts t0 left pulling extra bits in the right from t1.
+ * It is very handly to shift bignums, but lightning does not support
+ * these, nor 128 bit integers. The use of q{l,}sh{r,i} is to verify
+ * if there precision loss in a shift and/or have it as a quick way
+ * to multiply or divide by powers of two. */
+ /* SHLD */
+ rex(0, WIDE, t1, _NOREG, t0);
+ ic(0xf);
+ ic(0xa5);
+ mrm(0x03, r7(t1), r7(t0));
+ /* Must swap results if shift value is __WORDSIZE */
+ alui(X86_CMP, t3, __WORDSIZE);
+ over = jes(_jit->pc.w);
+ /* Calculate bits to shift right and fill high register */
+ rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
+ if (sign)
+ rshr(t1, t2, _RCX_REGNO);
+ else
+ rshr_u(t1, t2, _RCX_REGNO);
+ /* FIXME t3 == %rcx only happens in 32 bit as %a3 (JIT_A3) is not
+ * available -- it might be made available at some point, to
+ * allow optimizing usage or arguments in registers. For now
+ * keep the code, as one might cheat and use _RCX directly,
+ * what is not officially supported, but *must* work. */
+ /* Need to sign extend high register if shift value is zero */
+ if (t3 == _RCX_REGNO)
+ alui(X86_CMP, t3, __WORDSIZE);
+ else
+ alui(X86_CMP, t3, 0);
+ /* Finished. */
+ zero = jes(_jit->pc.w);
+ done = jmpsi(_jit->pc.w);
+ /* Swap registers if shift is __WORDSIZE */
+ patch_at(over, _jit->pc.w);
+ xchgr(t0, t1);
+ over_done = jmpsi(_jit->pc.w);
+ /* If shift value is zero */
+ patch_at(zero, _jit->pc.w);
+ if (sign)
+ rshi(t1, t2, __WORDSIZE - 1);
+ else
+ movi(t1, 0);
+ patch_at(over_done, _jit->pc.w);
+ patch_at(done, _jit->pc.w);
+ /* Release %RCX (if spilled) after branches */
+ clear(_RCX_REGNO, _RCX);
+ if (t3 != r3)
+ jit_unget_reg(s3);
+ if (t2 != r2)
+ jit_unget_reg(s2);
+ if (t1 != r1) {
+ movr(r1, t1);
+ jit_unget_reg(s1);
+ }
+ if (t0 != r0) {
+ movr(r0, t0);
+ jit_unget_reg(s0);
+ }
+}
+
static void
_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
rotshi(X86_SHL, r0, r1, i0);
}
+static void
+_xlshi(jit_state_t *_jit, jit_bool_t sign,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(r0, r2);
+ if (sign)
+ rshi(r1, r2, __WORDSIZE - 1);
+ else
+ movi(r1, 0);
+ }
+ else if (i0 == __WORDSIZE) {
+ movr(r1, r2);
+ movi(r0, 0);
+ }
+ else {
+ assert((jit_uword_t)i0 <= __WORDSIZE);
+ if (sign)
+ rshi(r1, r2, __WORDSIZE - i0);
+ else
+ rshi_u(r1, r2, __WORDSIZE - i0);
+ lshi(r0, r2, i0);
+ }
+}
+
+static void
+_xrshr(jit_state_t *_jit, jit_bool_t sign,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
+{
+ jit_int32_t sav, set;
+ jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
+ jit_word_t over, zero, done;
+ sav = set = 0;
+ /* %RCX must be used for shift. */
+ qsavset(_RCX_REGNO);
+ allocr(_RCX_REGNO, _RCX);
+ /* Almost certainly not %RCX */
+ t1 = r1;
+ if (r0 == _RCX_REGNO) {
+ s0 = jit_get_reg(jit_class_gpr);
+ t0 = rn(s0);
+ }
+ else {
+ t0 = r0;
+ /* r0 == r1 is undefined behavior */
+ if (r1 == _RCX_REGNO) {
+ s1 = jit_get_reg(jit_class_gpr);
+ t1 = rn(s1);
+ }
+ }
+ /* Allocate a temporary if a register is used more than once, or if
+ * the value to shift is %RCX */
+ if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
+ s2 = jit_get_reg(jit_class_gpr);
+ t2 = rn(s2);
+ movr(t2, r2);
+ }
+ else
+ t2 = r2;
+ /* Allocate temporary if shift is also one of the outputs */
+ if (r0 == r3 || r1 == r3) {
+ s3 = jit_get_reg(jit_class_gpr);
+ t3 = rn(s3);
+ movr(t3, r3);
+ }
+ else
+ t3 = r3;
+ /* Bits to shift left */
+ if (sign)
+ rshi(t1, t2, __WORDSIZE - 1);
+ else
+ movi(t1, 0);
+ /* Shift in %RCX */
+ /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
+ movr(_RCX_REGNO, t3);
+ /* Copy value to low register */
+ movr(t0, t2);
+ /* SHRD shifts t0 right pulling extra bits in the left from t1 */
+ /* SHRD */
+ rex(0, WIDE, t1, _NOREG, t0);
+ ic(0xf);
+ ic(0xad);
+ mrm(0x03, r7(t1), r7(t0));
+ /* Must swap results if shift value is __WORDSIZE */
+ alui(X86_CMP, t3, __WORDSIZE);
+ over = jes(_jit->pc.w);
+ /* Already zero or sign extended if shift value is zero */
+ alui(X86_CMP, t3, 0);
+ zero = jes(_jit->pc.w);
+ /* Calculate bits to shift left and fill high register */
+ rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
+ lshr(t1, t2, _RCX_REGNO);
+ done = jmpsi(_jit->pc.w);
+ /* Swap registers if shift is __WORDSIZE */
+ patch_at(over, _jit->pc.w);
+ xchgr(t0, t1);
+ /* If shift value is zero */
+ patch_at(zero, _jit->pc.w);
+ patch_at(done, _jit->pc.w);
+ /* Release %RCX (if spilled) after branches */
+ clear(_RCX_REGNO, _RCX);
+ if (t3 != r3)
+ jit_unget_reg(s3);
+ if (t2 != r2)
+ jit_unget_reg(s2);
+ if (t1 != r1) {
+ movr(r1, t1);
+ jit_unget_reg(s1);
+ }
+ if (t0 != r0) {
+ movr(r0, t0);
+ jit_unget_reg(s0);
+ }
+}
+
+static void
+_xrshi(jit_state_t *_jit, jit_bool_t sign,
+ jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+ if (i0 == 0) {
+ movr(r0, r2);
+ if (sign)
+ rshi(r1, r2, __WORDSIZE - 1);
+ else
+ movi(r1, 0);
+ }
+ else if (i0 == __WORDSIZE) {
+ movr(r1, r2);
+ if (sign)
+ rshi(r0, r2, __WORDSIZE - 1);
+ else
+ movi(r0, 0);
+ }
+ else {
+ assert((jit_uword_t)i0 <= __WORDSIZE);
+ lshi(r1, r2, __WORDSIZE - i0);
+ if (sign)
+ rshi(r0, r2, i0);
+ else
+ rshi_u(r0, r2, i0);
+ }
+}
+
static void
_unr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
{
}
#endif
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ clzr(r0, r0);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_word_t w, x;
+ /* LZCNT */
+ if (jit_cpu.abm)
+ ic(0xf3);
+ /* else BSR */
+ rex(0, WIDE, r0, _NOREG, r1);
+ ic(0x0f);
+ ic(0xbd);
+ mrm(0x3, r7(r0), r7(r1));
+ if (!jit_cpu.abm) {
+ /* jump if undefined: r1 == 0 */
+ w = jccs(X86_CC_E, _jit->pc.w);
+ /* count leading zeros */
+ rsbi(r0, r0, __WORDSIZE - 1);
+ /* done */
+ x = jmpsi(_jit->pc.w);
+ /* if r1 == 0 */
+ patch_at(w, _jit->pc.w);
+ movi(r0, __WORDSIZE);
+ /* not undefined */
+ patch_at(x, _jit->pc.w);
+ }
+ /* LZCNT has defined behavior for value zero and count leading zeros */
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ ctzr(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_word_t w;
+ jit_int32_t t0;
+ if (!jit_cpu.abm) {
+ if (jit_cmov_p())
+ t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk);
+ else
+ t0 = _NOREG;
+ if (t0 != _NOREG)
+ movi(rn(t0), __WORDSIZE);
+ }
+ /* TZCNT */
+ if (jit_cpu.abm)
+ ic(0xf3);
+ /* else BSF */
+ rex(0, WIDE, r0, _NOREG, r1);
+ ic(0x0f);
+ ic(0xbc);
+ mrm(0x3, r7(r0), r7(r1));
+ if (!jit_cpu.abm) {
+ /* No conditional move or need spill/reload a temporary */
+ if (t0 == _NOREG) {
+ w = jccs(X86_CC_E, _jit->pc.w);
+ movi(r0, __WORDSIZE);
+ patch_at(w, _jit->pc.w);
+ }
+ else {
+ /* CMOVE */
+ rex(0, WIDE, r0, _NOREG, rn(t0));
+ ic(0x0f);
+ ic(0x44);
+ mrm(0x3, r7(r0), r7(rn(t0)));
+ jit_unget_reg(t0);
+ }
+ }
+ /* TZCNT has defined behavior for value zero */
+}
+
+static void
+_rbitr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_word_t loop;
+ jit_int32_t sav, set;
+ jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
+ static const unsigned char swap_tab[256] = {
+ 0, 128, 64, 192, 32, 160, 96, 224,
+ 16, 144, 80, 208, 48, 176, 112, 240,
+ 8, 136, 72, 200, 40, 168, 104, 232,
+ 24, 152, 88, 216 ,56, 184, 120, 248,
+ 4, 132, 68, 196, 36, 164, 100, 228,
+ 20, 148, 84, 212, 52, 180, 116, 244,
+ 12, 140, 76, 204, 44, 172, 108, 236,
+ 28, 156, 92, 220, 60, 188, 124, 252,
+ 2, 130, 66, 194, 34, 162, 98, 226,
+ 18, 146, 82, 210, 50, 178, 114, 242,
+ 10, 138, 74, 202, 42, 170, 106, 234,
+ 26, 154, 90, 218, 58, 186, 122, 250,
+ 6, 134, 70, 198, 38, 166, 102, 230,
+ 22, 150, 86, 214, 54, 182, 118, 246,
+ 14, 142, 78, 206, 46, 174, 110, 238,
+ 30, 158, 94, 222, 62, 190, 126, 254,
+ 1, 129, 65, 193, 33, 161, 97, 225,
+ 17, 145, 81, 209, 49, 177, 113, 241,
+ 9, 137, 73, 201, 41, 169, 105, 233,
+ 25, 153, 89, 217, 57, 185, 121, 249,
+ 5, 133, 69, 197, 37, 165, 101, 229,
+ 21, 149, 85, 213, 53, 181, 117, 245,
+ 13, 141, 77, 205, 45, 173, 109, 237,
+ 29, 157, 93, 221, 61, 189, 125, 253,
+ 3, 131, 67, 195, 35, 163, 99, 227,
+ 19, 147, 83, 211, 51, 179, 115, 243,
+ 11, 139, 75, 203, 43, 171, 107, 235,
+ 27, 155, 91, 219, 59, 187, 123, 251,
+ 7, 135, 71, 199, 39, 167, 103, 231,
+ 23, 151, 87, 215, 55, 183, 119, 247,
+ 15, 143, 79, 207, 47, 175, 111, 239,
+ 31, 159, 95, 223, 63, 191, 127, 255
+ };
+ sav = set = 0;
+ isavset(_RCX_REGNO);
+ allocr(_RCX_REGNO, _RCX);
+ if (r0 == _RCX_REGNO) {
+ t0 = jit_get_reg(jit_class_gpr);
+ r0_reg = rn(t0);
+ }
+ else {
+ t0 = JIT_NOREG;
+ r0_reg = r0;
+ }
+ if (r1 == _RCX_REGNO || r0 == r1) {
+ t1 = jit_get_reg(jit_class_gpr);
+ r1_reg = rn(t1);
+ movr(r1_reg, r1);
+ }
+ else {
+ t1 = JIT_NOREG;
+ r1_reg = r1;
+ }
+ t2 = jit_get_reg(jit_class_gpr);
+ t3 = jit_get_reg(jit_class_gpr);
+#if __WORDSIZE == 32
+ /* Avoid condition that causes running out of registers */
+ if (!reg8_p(r1_reg)) {
+ movi(rn(t2), 0xff);
+ andr(rn(t2), r1_reg, rn(t2));
+ }
+ else
+#endif
+ extr_uc(rn(t2), r1_reg);
+ movi(rn(t3), (jit_word_t)swap_tab);
+ ldxr_uc(r0_reg, rn(t3), rn(t2));
+ movi(_RCX_REGNO, 8);
+ loop = _jit->pc.w;
+ rshr(rn(t2), r1_reg, _RCX_REGNO);
+ extr_uc(rn(t2), rn(t2));
+ lshi(r0_reg, r0_reg, 8);
+ ldxr_uc(rn(t2), rn(t3), rn(t2));
+ orr(r0_reg, r0_reg, rn(t2));
+ addi(_RCX_REGNO, _RCX_REGNO, 8);
+ alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
+ jls(loop);
+ clear(_RCX_REGNO, _RCX);
+ jit_unget_reg(t3);
+ jit_unget_reg(t2);
+ if (t1 != JIT_NOREG)
+ jit_unget_reg(t1);
+ if (t0 != JIT_NOREG) {
+ movr(r0, r0_reg);
+ jit_unget_reg(t0);
+ }
+}
+
+static void
+_popcntr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.abm) {
+ ic(0xf3);
+ rex(0, WIDE, r0, _NOREG, r1);
+ ic(0x0f);
+ ic(0xb8);
+ mrm(0x3, r7(r0), r7(r1));
+ }
+ else {
+ jit_word_t loop;
+ jit_int32_t sav, set;
+ jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
+ static const unsigned char pop_tab[256] = {
+ 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
+ 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
+ };
+ sav = set = 0;
+ isavset(_RCX_REGNO);
+ allocr(_RCX_REGNO, _RCX);
+ if (r0 == _RCX_REGNO) {
+ t0 = jit_get_reg(jit_class_gpr);
+ r0_reg = rn(t0);
+ }
+ else {
+ t0 = JIT_NOREG;
+ r0_reg = r0;
+ }
+ if (r1 == _RCX_REGNO || r0 == r1) {
+ t1 = jit_get_reg(jit_class_gpr);
+ r1_reg = rn(t1);
+ movr(r1_reg, r1);
+ }
+ else {
+ t1 = JIT_NOREG;
+ r1_reg = r1;
+ }
+ t2 = jit_get_reg(jit_class_gpr);
+ t3 = jit_get_reg(jit_class_gpr);
+#if __WORDSIZE == 32
+ /* Avoid condition that causes running out of registers */
+ if (!reg8_p(r1_reg)) {
+ movi(rn(t2), 0xff);
+ andr(rn(t2), r1_reg, rn(t2));
+ }
+ else
+#endif
+ extr_uc(rn(t2), r1_reg);
+ movi(rn(t3), (jit_word_t)pop_tab);
+ ldxr_uc(r0_reg, rn(t3), rn(t2));
+ movi(_RCX_REGNO, 8);
+ loop = _jit->pc.w;
+ rshr(rn(t2), r1_reg, _RCX_REGNO);
+ extr_uc(rn(t2), rn(t2));
+ ldxr_uc(rn(t2), rn(t3), rn(t2));
+ addr(r0_reg, r0_reg, rn(t2));
+ addi(_RCX_REGNO, _RCX_REGNO, 8);
+ alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
+ jls(loop);
+ clear(_RCX_REGNO, _RCX);
+ jit_unget_reg(t3);
+ jit_unget_reg(t2);
+ if (t1 != JIT_NOREG)
+ jit_unget_reg(t1);
+ if (t0 != JIT_NOREG) {
+ movr(r0, r0_reg);
+ jit_unget_reg(t0);
+ }
+ }
+}
+
static void
_cr(jit_state_t *_jit,
jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
ii(i0);
# if !__X64_32
}
+ else if (can_sign_extend_int_p(i0)) {
+ rex(0, 1, _NOREG, _NOREG, r0);
+ ic(0xc7);
+ ic(0xc0 | r7(r0));
+ ii(i0);
+ }
else {
rex(0, 1, _NOREG, _NOREG, r0);
ic(0xb8 | r7(r0));
#endif
}
+#if CAN_RIP_ADDRESS
+static jit_word_t
+#else
static void
+#endif
_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
+#if CAN_RIP_ADDRESS
+ jit_word_t w, rel;
+ w = _jit->pc.w;
+ rel = i0 - (w + 8);
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ /* lea rel(%rip), %r0 */
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ w = _jit->pc.w;
+ ic(0x8d);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
if (i0)
imovi(r0, i0);
else
ixorr(r0, r0);
+#if CAN_RIP_ADDRESS
+ return (w);
+#endif
}
static jit_word_t
_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
+ jit_word_t w;
rex(0, WIDE, _NOREG, _NOREG, r0);
+ w = _jit->pc.w;
ic(0xb8 | r7(r0));
il(i0);
- return (_jit->pc.w);
+ return (w);
}
static void
mrm(0x03, r7(r0), r7(r1));
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t save_rax, restore_rax;
+ jit_int32_t ascasr_reg, ascasr_use;
+ if (r0 != _RAX_REGNO) { /* result not in %rax */
+ if (r2 != _RAX_REGNO) { /* old value not in %rax */
+ save_rax = jit_get_reg(jit_class_gpr);
+ movr(rn(save_rax), _RAX_REGNO);
+ restore_rax = 1;
+ }
+ else
+ restore_rax = 0;
+ }
+ else
+ restore_rax = 0;
+ if (r2 != _RAX_REGNO)
+ movr(_RAX_REGNO, r2);
+ if (r1 == _NOREG) { /* using immediate address */
+ if (!can_sign_extend_int_p(i0)) {
+ ascasr_reg = jit_get_reg(jit_class_gpr);
+ if (ascasr_reg == _RAX) {
+ ascasr_reg = jit_get_reg(jit_class_gpr);
+ jit_unget_reg(_RAX);
+ }
+ ascasr_use = 1;
+ movi(rn(ascasr_reg), i0);
+ }
+ else
+ ascasr_use = 0;
+ }
+ else
+ ascasr_use = 0;
+ ic(0xf0); /* lock */
+ if (ascasr_use)
+ rex(0, WIDE, r3, _NOREG, rn(ascasr_reg));
+ else
+ rex(0, WIDE, r3, _NOREG, r1);
+ ic(0x0f);
+ ic(0xb1);
+ if (r1 != _NOREG) /* casr */
+ rx(r3, 0, r1, _NOREG, _SCL1);
+ else { /* casi */
+ if (ascasr_use)
+ rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */
+ else
+ rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */
+ }
+ cc(X86_CC_E, r0);
+ if (r0 != _RAX_REGNO)
+ movr(r0, _RAX_REGNO);
+ if (restore_rax) {
+ movr(_RAX_REGNO, rn(save_rax));
+ jit_unget_reg(save_rax);
+ }
+ if (ascasr_use)
+ jit_unget_reg(ascasr_reg);
+}
+
static void
_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
#endif
static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
extr_us(r0, r1);
ic(0x66);
}
static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
movr(r0, r1);
rex(0, 0, _NOREG, _NOREG, r0);
#if __X64 && !__X64_32
static void
-_htonr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
movr(r0, r1);
rex(0, 1, _NOREG, _NOREG, r0);
}
#endif
+static void
+_extr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
+{
+ jit_word_t mask;
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ if (i1 == __WORDSIZE)
+ movr(r0, r1);
+ else {
+ if (__WORDSIZE - (i0 + i1)) {
+ lshi(r0, r1, __WORDSIZE - (i0 + i1));
+ rshi(r0, r0, __WORDSIZE - i1);
+ }
+ else
+ rshi(r0, r1, __WORDSIZE - i1);
+ }
+}
+
+static void
+_extr_u(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
+{
+ jit_int32_t t0;
+ jit_word_t mask;
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ if (i1 == __WORDSIZE)
+ movr(r0, r1);
+ /* Only cheaper in code size or number of instructions if i0 is not zero */
+ /* Number of cpu cicles not tested */
+ else if (i0 && jit_cpu.bmi2) {
+ mask = ((ONE << i1) - 1) << i0;
+ t0 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), mask);
+ /* PEXT */
+ vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 2);
+ ic(0xf5);
+ mrm(0x03, r7(r0), r7(rn(t0)));
+ jit_unget_reg(t0);
+ }
+ else {
+ if (i0)
+ rshi_u(r0, r1, i0);
+ andi(r0, r0, (ONE << i1) - 1);
+ }
+}
+
+static void
+_depr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
+{
+ jit_word_t mask;
+ jit_int32_t t0, t1;
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ if (i1 == __WORDSIZE)
+ movr(r0, r1);
+ /* Only cheaper in code size or number of instructions if i0 is not zero */
+ /* Number of cpu cicles not tested */
+ else if (i0 && jit_cpu.bmi2) {
+ mask = ((ONE << i1) - 1) << i0;
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), mask);
+ movr(rn(t1), r0);
+ /* PDEP */
+ vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 3);
+ ic(0xf5);
+ mrm(0x03, r7(r0), r7(rn(t0)));
+ andi(rn(t1), rn(t1), ~mask);
+ orr(r0, r0, rn(t1));
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+ }
+ else {
+ mask = (ONE << i1) - 1;
+ t0 = jit_get_reg(jit_class_gpr);
+ andi(rn(t0), r1, mask);
+ if (i0) {
+ lshi(rn(t0), rn(t0), i0);
+ mask <<= i0;
+ }
+ andi(r0, r0, ~mask);
+ orr(r0, r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+}
+
static void
_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x0f);
+ ic(0xbe);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x0f);
ic(0xbe);
_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x0f);
+ ic(0xb6);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x0f);
ic(0xb6);
_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x0f);
+ ic(0xbf);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x0f);
ic(0xbf);
_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x0f);
+ ic(0xb7);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x0f);
ic(0xb7);
_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x63);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
#if __X64
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x63);
_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+# if !__X64_32
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, 0, r0, _NOREG, _NOREG);
+ ic(0x63);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, 0, r0, _NOREG, _NOREG);
ic(0x63);
rx(r0, i0, _NOREG, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
+# if __X64_32
+ ldr_i(r0, rn(reg));
+# else
ldr_ui(r0, rn(reg));
+# endif
jit_unget_reg(reg);
}
}
_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
- rex(0, 1, r0, _NOREG, _NOREG);
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x8b);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else if (can_sign_extend_int_p(i0)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x8b);
rx(r0, i0, _NOREG, _NOREG, _SCL1);
}
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
+# if __X64_32
+ ldxr_i(r0, r1, rn(reg));
+# else
ldxr_ui(r0, r1, rn(reg));
+# endif
jit_unget_reg(reg);
}
}
_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 16 : rel + 16;
+ if (can_sign_extend_int_p(rel)) {
+ if (reg8_p(r0)) {
+ rex(0, 0, r0, _NOREG, _NOREG);
+ ic(0x88);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+ movr(rn(reg), r0);
+ rex(0, 0, rn(reg), _NOREG, _NOREG);
+ ic(0x88);
+ rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ jit_unget_reg(reg);
+ }
+ }
+ else
+#endif
+ if (address_p(i0)) {
if (reg8_p(r0)) {
rex(0, 0, r0, _NOREG, _NOREG);
ic(0x88);
_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ ic(0x66);
+ rex(0, 0, r0, _NOREG, _NOREG);
+ ic(0x89);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
ic(0x66);
rex(0, 0, r0, _NOREG, _NOREG);
ic(0x89);
_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, 0, r0, _NOREG, _NOREG);
+ ic(0x89);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, 0, r0, _NOREG, _NOREG);
ic(0x89);
rx(r0, i0, _NOREG, _NOREG, _SCL1);
_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x89);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
if (can_sign_extend_int_p(i0)) {
- rex(0, 1, r0, _NOREG, _NOREG);
+ rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x89);
rx(r0, i0, _NOREG, _NOREG, _SCL1);
}
}
#endif
-static void
+static jit_word_t
_jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
{
+ jit_word_t d;
jit_word_t w;
+ w = _jit->pc.w;
+ d = i0 - (w + 2);
ic(0x70 | code);
- w = i0 - (_jit->pc.w + 1);
- ic(w);
+ ic(d);
+ return (w);
}
-static void
+static jit_word_t
_jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
{
+ jit_word_t d;
jit_word_t w;
+ w = _jit->pc.w;
ic(0x0f);
+ d = i0 - (w + 6);
ic(0x80 | code);
- w = i0 - (_jit->pc.w + 4);
- ii(w);
+ ii(d);
+ return (w);
}
-static void
+static jit_word_t
_jcr(jit_state_t *_jit,
jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
alur(X86_CMP, r0, r1);
- jcc(code, i0);
+ return (jcc(code, i0));
}
-static void
+static jit_word_t
_jci(jit_state_t *_jit,
jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
alui(X86_CMP, r0, i1);
- jcc(code, i0);
+ return (jcc(code, i0));
}
-static void
+static jit_word_t
_jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
{
testr(r0, r0);
- jcc(code, i0);
+ return (jcc(code, i0));
}
static jit_word_t
_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_L, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_L, i0, r0, r1));
}
static jit_word_t
_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_L, i0, r0, i1);
- else jci0(X86_CC_S, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_L, i0, r0, i1);
+ else w = jci0(X86_CC_S, i0, r0);
+ return (w);
}
static jit_word_t
_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_B, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_B, i0, r0, r1));
}
static jit_word_t
_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_B, i0, r0, i1);
- else jci0(X86_CC_B, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_B, i0, r0, i1);
+ else w = jci0(X86_CC_B, i0, r0);
+ return (w);
}
static jit_word_t
_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_LE, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_LE, i0, r0, r1);
+ return (w);
}
static jit_word_t
_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_LE, i0, r0, i1);
- else jci0(X86_CC_LE, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_LE, i0, r0, i1);
+ else w = jci0(X86_CC_LE, i0, r0);
+ return (w);
}
static jit_word_t
_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_BE, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_BE, i0, r0, r1);
+ return (w);
}
static jit_word_t
_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_BE, i0, r0, i1);
- else jci0(X86_CC_BE, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_BE, i0, r0, i1);
+ else w = jci0(X86_CC_BE, i0, r0);
+ return (w);
}
static jit_word_t
_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_E, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_E, i0, r0, r1);
+ return (w);
}
static jit_word_t
_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_E, i0, r0, i1);
- else jci0(X86_CC_E, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_E, i0, r0, i1);
+ else w = jci0(X86_CC_E, i0, r0);
+ return (w);
}
static jit_word_t
_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_GE, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_GE, i0, r0, r1);
+ return (w);
}
static jit_word_t
_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_GE, i0, r0, i1);
- else jci0(X86_CC_NS, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_GE, i0, r0, i1);
+ else w = jci0(X86_CC_NS, i0, r0);
+ return (w);
}
static jit_word_t
_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_AE, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_AE, i0, r0, r1);
+ return (w);
}
static jit_word_t
_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_AE, i0, r0, i1);
- else jmpi(i0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_AE, i0, r0, i1);
+ else w = jmpi(i0);
+ return (w);
}
static jit_word_t
_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_G, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_G, i0, r0, r1));
}
static jit_word_t
_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- jci(X86_CC_G, i0, r0, i1);
- return (_jit->pc.w);
+ return (jci(X86_CC_G, i0, r0, i1));
}
static jit_word_t
_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_A, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_A, i0, r0, r1));
}
static jit_word_t
_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_A, i0, r0, i1);
- else jci0(X86_CC_NE, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_A, i0, r0, i1);
+ else w = jci0(X86_CC_NE, i0, r0);
+ return (w);
}
static jit_word_t
_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_NE, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_NE, i0, r0, r1));
}
static jit_word_t
_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_NE, i0, r0, i1);
- else jci0(X86_CC_NE, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_NE, i0, r0, i1);
+ else w = jci0(X86_CC_NE, i0, r0);
+ return (w);
}
static jit_word_t
_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
testr(r0, r1);
- jnz(i0);
- return (_jit->pc.w);
+ return (jnz(i0));
}
static jit_word_t
testr(r0, rn(reg));
jit_unget_reg(reg);
}
- jnz(i0);
- return (_jit->pc.w);
+ return (jnz(i0));
}
static jit_word_t
_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
testr(r0, r1);
- jz(i0);
- return (_jit->pc.w);
+ return (jz(i0));
}
static jit_word_t
testr(r0, rn(reg));
jit_unget_reg(reg);
}
- jz(i0);
- return (_jit->pc.w);
+ return (jz(i0));
}
static jit_word_t
_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
iaddr(r0, r1);
- jo(i0);
- return (_jit->pc.w);
+ return (jo(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
iaddi(r0, i1);
- jo(i0);
- return (_jit->pc.w);
+ return (jo(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
iaddr(r0, r1);
- jc(i0);
- return (_jit->pc.w);
+ return (jc(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
iaddi(r0, i1);
- jc(i0);
- return (_jit->pc.w);
+ return (jc(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
iaddr(r0, r1);
- jno(i0);
- return (_jit->pc.w);
+ return (jno(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
iaddi(r0, i1);
- jno(i0);
- return (_jit->pc.w);
+ return (jno(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
iaddr(r0, r1);
- jnc(i0);
- return (_jit->pc.w);
+ return (jnc(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
iaddi(r0, i1);
- jnc(i0);
- return (_jit->pc.w);
+ return (jnc(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
isubr(r0, r1);
- jo(i0);
- return (_jit->pc.w);
+ return (jo(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
isubi(r0, i1);
- jo(i0);
- return (_jit->pc.w);
+ return (jo(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
isubr(r0, r1);
- jc(i0);
- return (_jit->pc.w);
+ return (jc(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
isubi(r0, i1);
- jc(i0);
- return (_jit->pc.w);
+ return (jc(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
isubr(r0, r1);
- jno(i0);
- return (_jit->pc.w);
+ return (jno(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
isubi(r0, i1);
- jno(i0);
- return (_jit->pc.w);
+ return (jno(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
isubr(r0, r1);
- jnc(i0);
- return (_jit->pc.w);
+ return (jnc(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
isubi(r0, i1);
- jnc(i0);
- return (_jit->pc.w);
+ return (jnc(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t word;
jit_word_t w;
+ jit_word_t d;
+ jit_word_t l = _jit->pc.w + 5;
+ d = i0 - l;
#if __X64
- w = i0 - (_jit->pc.w + 5);
- if ((jit_int32_t)w == w) {
+ if (
+# if __X64_32
+ !((d < 0) ^ (l < 0)) &&
+# endif
+ (jit_int32_t)d == d) {
#endif
+ w = _jit->pc.w;
ic(0xe8);
- w = i0 - (_jit->pc.w + 4);
- ii(w);
- word = _jit->pc.w;
+ ii(d);
#if __X64
}
else
- word = calli_p(i0);
+ w = calli_p(i0);
#endif
- return (word);
+ return (w);
}
#if __X64
static jit_word_t
_calli_p(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t word;
+ jit_word_t w;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
- word = movi_p(rn(reg), i0);
+ w = movi_p(rn(reg), i0);
callr(rn(reg));
jit_unget_reg(reg);
- return (word);
+ return (w);
}
#endif
static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t word;
jit_word_t w;
+ jit_word_t d;
+ jit_word_t l = _jit->pc.w + 5;
+ d = i0 - l;
#if __X64
- w = i0 - (_jit->pc.w + 5);
- if ((jit_int32_t)w == w) {
+ if (
+# if __X64_32
+ !((d < 0) ^ (l < 0)) &&
+# endif
+ (jit_int32_t)d == d) {
#endif
+ w = _jit->pc.w;
ic(0xe9);
- w = i0 - (_jit->pc.w + 4);
- ii(w);
- word = _jit->pc.w;
+ ii(d);
#if __X64
}
else
- word = jmpi_p(i0);
+ w = jmpi_p(i0);
#endif
- return (word);
+ return (w);
}
#if __X64
static jit_word_t
_jmpi_p(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t word;
+ jit_word_t w;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- word = movi_p(rn(reg), i0);
+ w = movi_p(rn(reg), i0);
jmpr(rn(reg));
jit_unget_reg(reg);
- return (word);
+ return (w);
}
#endif
-static void
+static jit_word_t
_jmpsi(jit_state_t *_jit, jit_uint8_t i0)
{
+ jit_word_t w = _jit->pc.w;
ic(0xeb);
ic(i0);
+ return (w);
}
+#undef clear
+#undef allocr
+#undef savset
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t reg;
+ jit_int32_t reg, offs;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
if (_jitc->function->assume_frame)
return;
(_jitc->function->self.alen > 32 ?
_jitc->function->self.alen : 32) -
/* align stack at 16 bytes */
- _jitc->function->self.aoff) + 15) & -16) +
- stack_adjust;
+ _jitc->function->self.aoff) + 15) & -16);
#else
_jitc->function->stack = (((_jitc->function->self.alen -
- _jitc->function->self.aoff) + 15) & -16) +
- stack_adjust;
+ _jitc->function->self.aoff) + 15) & -16);
#endif
- subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+ if (_jitc->function->stack)
+ _jitc->function->need_stack = 1;
+
+ if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+ /* check if any callee save register needs to be saved */
+ for (reg = 0; reg < _jitc->reglen; ++reg)
+ if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+ (_rvs[reg].spec & jit_class_sav)) {
+ _jitc->function->need_stack = 1;
+ break;
+ }
+ }
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ subi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
/* callee save registers */
-#if __X32
- if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
- stxi(12, _RSP_REGNO, _RDI_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
- stxi( 8, _RSP_REGNO, _RSI_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- stxi( 4, _RSP_REGNO, _RBX_REGNO);
-#else
-# if __CYGWIN__ || _WIN32
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
- sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
- sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
- sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
- sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
- sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
- sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
- sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
- sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
- sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
- sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R15))
- stxi(56, _RSP_REGNO, _R15_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R14))
- stxi(48, _RSP_REGNO, _R14_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R13))
- stxi(40, _RSP_REGNO, _R13_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R12))
- stxi(32, _RSP_REGNO, _R12_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
- stxi(24, _RSP_REGNO, _RSI_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
- stxi(16, _RSP_REGNO, _RDI_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- stxi( 8, _RSP_REGNO, _RBX_REGNO);
-# else
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- stxi(40, _RSP_REGNO, _RBX_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R12))
- stxi(32, _RSP_REGNO, _R12_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R13))
- stxi(24, _RSP_REGNO, _R13_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R14))
- stxi(16, _RSP_REGNO, _R14_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R15))
- stxi( 8, _RSP_REGNO, _R15_REGNO);
-# endif
+ for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ stxi(offs, _RSP_REGNO, rn(iregs[reg]));
+ offs += REAL_WORDSIZE;
+ }
+ }
+#if __X64 && (__CYGWIN__ || _WIN32)
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg]));
+ offs += sizeof(jit_float64_t);
+ }
+ }
#endif
- stxi(0, _RSP_REGNO, _RBP_REGNO);
- movr(_RBP_REGNO, _RSP_REGNO);
+
+ if (_jitc->function->need_frame) {
+ stxi(0, _RSP_REGNO, _RBP_REGNO);
+ movr(_RBP_REGNO, _RSP_REGNO);
+ }
/* alloca */
- subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
+ if (_jitc->function->stack)
+ subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), _jitc->function->self.aoff);
/* test %al, %al */
ic(0x84);
ic(0xc0);
- jes(0);
- nofp_code = _jit->pc.w;
+ nofp_code = jes(0);
/* Save fp registers in the save area, if any is a vararg */
/* Note that the full 16 byte xmm is not saved, because
sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
- patch_rel_char(nofp_code, _jit->pc.w);
+ patch_at(nofp_code, _jit->pc.w);
}
}
#endif
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
+ jit_int32_t reg, offs;
if (_jitc->function->assume_frame)
return;
+ if (_jitc->function->need_frame)
+ movr(_RSP_REGNO, _RBP_REGNO);
+
/* callee save registers */
- movr(_RSP_REGNO, _RBP_REGNO);
-#if __X32
- if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
- ldxi(_RDI_REGNO, _RSP_REGNO, 12);
- if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
- ldxi(_RSI_REGNO, _RSP_REGNO, 8);
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- ldxi(_RBX_REGNO, _RSP_REGNO, 4);
-#else
-# if __CYGWIN__ || _WIN32
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
- sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
- sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
- sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
- sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
- sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
- sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
- sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
- sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
- sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
- sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64);
- if (jit_regset_tstbit(&_jitc->function->regset, _R15))
- ldxi(_R15_REGNO, _RSP_REGNO, 56);
- if (jit_regset_tstbit(&_jitc->function->regset, _R14))
- ldxi(_R14_REGNO, _RSP_REGNO, 48);
- if (jit_regset_tstbit(&_jitc->function->regset, _R13))
- ldxi(_R13_REGNO, _RSP_REGNO, 40);
- if (jit_regset_tstbit(&_jitc->function->regset, _R12))
- ldxi(_R12_REGNO, _RSP_REGNO, 32);
- if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
- ldxi(_RSI_REGNO, _RSP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
- ldxi(_RDI_REGNO, _RSP_REGNO, 16);
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- ldxi(_RBX_REGNO, _RSP_REGNO, 8);
-# else
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- ldxi(_RBX_REGNO, _RSP_REGNO, 40);
- if (jit_regset_tstbit(&_jitc->function->regset, _R12))
- ldxi(_R12_REGNO, _RSP_REGNO, 32);
- if (jit_regset_tstbit(&_jitc->function->regset, _R13))
- ldxi(_R13_REGNO, _RSP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _R14))
- ldxi(_R14_REGNO, _RSP_REGNO, 16);
- if (jit_regset_tstbit(&_jitc->function->regset, _R15))
- ldxi(_R15_REGNO, _RSP_REGNO, 8);
-# endif
+ for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ ldxi(rn(iregs[reg]), _RSP_REGNO, offs);
+ offs += REAL_WORDSIZE;
+ }
+ }
+#if __X64 && (__CYGWIN__ || _WIN32)
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs);
+ offs += sizeof(jit_float64_t);
+ }
+ }
#endif
- ldxi(_RBP_REGNO, _RSP_REGNO, 0);
- addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+ if (_jitc->function->need_frame) {
+ ldxi(_RBP_REGNO, _RSP_REGNO, 0);
+ addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
+ }
+ /* This condition does not happen as much as expected because
+ * it is not safe to not create a frame pointer if any function
+ * is called, even jit functions, as those might call external
+ * functions. */
+ else if (_jitc->function->need_stack)
+ addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
ic(0xc3);
}
{
#if __X32 || __CYGWIN__ || _WIN32
assert(_jitc->function->self.call & jit_call_varargs);
- addi(r0, _RBP_REGNO, _jitc->function->self.size);
+ addi(r0, _RBP_REGNO, jit_selfsize());
#else
jit_int32_t reg;
stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
/* Initialize overflow pointer to the first stack argument. */
- addi(rn(reg), _RBP_REGNO, _jitc->function->self.size);
+ addi(rn(reg), _RBP_REGNO, jit_selfsize());
stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
/* Initialize register save area pointer. */
/* Jump over if there are no remaining arguments in the save area. */
icmpi(rn(rg0), va_gp_max_offset);
- jaes(0);
- ge_code = _jit->pc.w;
+ ge_code = jaes(0);
/* Load the save area pointer in the second temporary. */
ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
jit_unget_reg(rg1);
/* Jump over overflow code. */
- jmpsi(0);
- lt_code = _jit->pc.w;
+ lt_code = jmpsi(0);
/* Where to land if argument is in overflow area. */
- patch_rel_char(ge_code, _jit->pc.w);
+ patch_at(ge_code, _jit->pc.w);
/* Load overflow pointer. */
ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
/* Where to land if argument is in save area. */
- patch_rel_char(lt_code, _jit->pc.w);
+ patch_at(lt_code, _jit->pc.w);
jit_unget_reg(rg0);
#endif
/* Jump over if there are no remaining arguments in the save area. */
icmpi(rn(rg0), va_fp_max_offset);
- jaes(0);
- ge_code = _jit->pc.w;
+ ge_code = jaes(0);
/* Load the save area pointer in the second temporary. */
ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
jit_unget_reg(rg1);
/* Jump over overflow code. */
- jmpsi(0);
- lt_code = _jit->pc.w;
+ lt_code = jmpsi(0);
/* Where to land if argument is in overflow area. */
- patch_rel_char(ge_code, _jit->pc.w);
+ patch_at(ge_code, _jit->pc.w);
/* Load overflow pointer. */
ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
/* Where to land if argument is in save area. */
- patch_rel_char(lt_code, _jit->pc.w);
+ patch_at(lt_code, _jit->pc.w);
jit_unget_reg(rg0);
#endif
}
static void
-_patch_at(jit_state_t *_jit, jit_node_t *node,
- jit_word_t instr, jit_word_t label)
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
{
- switch (node->code) {
-# if __X64
- case jit_code_calli:
- case jit_code_jmpi:
-# endif
- case jit_code_movi:
- patch_abs(instr, label);
+ jit_word_t disp;
+ jit_uint8_t *code = (jit_uint8_t *)instr;
+ ++instr;
+ switch (code[0]) {
+ /* movi_p */
+ case 0xb8 ... 0xbf:
+ *(jit_word_t *)instr = label;
break;
- default:
- patch_rel(instr, label);
+ /* forward pc relative address known to be in range */
+#if CAN_RIP_ADDRESS
+ /* movi */
+ case 0x8d:
+ ++instr;
+ goto apply;
+#endif
+ /* jcc */
+ case 0x0f:
+ ++instr;
+ if (code[1] < 0x80 || code[1] > 0x8f)
+ goto fail;
+ /* calli */
+ case 0xe8:
+ /* jmpi */
+ case 0xe9:
+#if CAN_RIP_ADDRESS
+ apply:
+#endif
+ disp = label - (instr + 4);
+ assert((jit_int32_t)disp == disp);
+ *(jit_int32_t *)instr = disp;
break;
+ /* jccs */
+ case 0x70 ... 0x7f:
+ /* jmpsi */
+ case 0xeb:
+ disp = label - (instr + 1);
+ assert((jit_int8_t)disp == disp);
+ *(jit_int8_t *)instr = disp;
+ break;
+ default:
+ fail:
+ abort();
}
}
#endif