git subrepo pull (merge) --force deps/lightning
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86-cpu.c
CommitLineData
4a71579b 1/*
79bfeef6 2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4a71579b
PC
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20/* avoid using it due to partial stalls */
21#define USE_INC_DEC 0
22
23#if PROTO
ba86ff93
PC
24# if __WORDSIZE == 64 && _WIN32
25# define ONE 1LL
26# else
27# define ONE 1L
28# endif
4a71579b
PC
29# if __X32 || __X64_32
30# define WIDE 0
31# define ldi(u, v) ldi_i(u, v)
32# define ldr(u, v) ldr_i(u, v)
33# define ldxr(u, v, w) ldxr_i(u, v, w)
34# define ldxi(u, v, w) ldxi_i(u, v, w)
ba86ff93 35# define str(u, v) str_i(u, v)
4a71579b 36# define sti(u, v) sti_i(u, v)
ba86ff93 37# define stxr(u, v, w) stxr_i(u, v, w)
4a71579b
PC
38# define stxi(u, v, w) stxi_i(u, v, w)
39# define can_sign_extend_int_p(im) 1
40# define can_zero_extend_int_p(im) 1
41# define fits_uint32_p(im) 1
42# else
43# define WIDE 1
44# define ldi(u, v) ldi_l(u, v)
45# define ldr(u, v) ldr_l(u, v)
46# define ldxr(u, v, w) ldxr_l(u, v, w)
47# define ldxi(u, v, w) ldxi_l(u, v, w)
ba86ff93 48# define str(u, v) str_l(u, v)
4a71579b 49# define sti(u, v) sti_l(u, v)
ba86ff93 50# define stxr(u, v, w) stxr_l(u, v, w)
4a71579b
PC
51# define stxi(u, v, w) stxi_l(u, v, w)
52# define can_sign_extend_int_p(im) \
ba86ff93
PC
53 (((long long)(im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \
54 ((long long)(im) < 0 && (long long)(im) > -0x80000000LL))
4a71579b
PC
55# define can_zero_extend_int_p(im) \
56 ((im) >= 0 && (im) < 0x80000000LL)
57# define fits_uint32_p(im) (((im) & 0xffffffff00000000LL) == 0)
58# endif
59# if __X32 || __CYGWIN__ || __X64_32 || _WIN32
60# define reg8_p(rn) \
61 ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
62# else
63# define reg8_p(rn) 1
64# endif
65# define _RAX_REGNO 0
66# define _RCX_REGNO 1
67# define _RDX_REGNO 2
68# define _RBX_REGNO 3
69# define _RSP_REGNO 4
70# define _RBP_REGNO 5
71# define _RSI_REGNO 6
72# define _RDI_REGNO 7
73# define _R8_REGNO 8
74# define _R9_REGNO 9
75# define _R10_REGNO 10
76# define _R11_REGNO 11
77# define _R12_REGNO 12
78# define _R13_REGNO 13
79# define _R14_REGNO 14
80# define _R15_REGNO 15
81# define r7(reg) ((reg) & 7)
82# define r8(reg) ((reg) & 15)
83# define _SCL1 0x00
84# define _SCL2 0x01
85# define _SCL4 0x02
86# define _SCL8 0x03
87# define X86_ADD 0
88# define X86_OR 1 << 3
89# define X86_ADC 2 << 3
90# define X86_SBB 3 << 3
91# define X86_AND 4 << 3
92# define X86_SUB 5 << 3
93# define X86_XOR 6 << 3
94# define X86_CMP 7 << 3
95# define X86_ROL 0
96# define X86_ROR 1
97# define X86_RCL 2
98# define X86_RCR 3
99# define X86_SHL 4
100# define X86_SHR 5
101# define X86_SAR 7
102# define X86_NOT 2
103# define X86_NEG 3
104# define X86_MUL 4
105# define X86_IMUL 5
106# define X86_DIV 6
107# define X86_IDIV 7
108# define X86_CC_O 0x0
109# define X86_CC_NO 0x1
110# define X86_CC_NAE 0x2
111# define X86_CC_B 0x2
112# define X86_CC_C 0x2
113# define X86_CC_AE 0x3
114# define X86_CC_NB 0x3
115# define X86_CC_NC 0x3
116# define X86_CC_E 0x4
117# define X86_CC_Z 0x4
118# define X86_CC_NE 0x5
119# define X86_CC_NZ 0x5
120# define X86_CC_BE 0x6
121# define X86_CC_NA 0x6
122# define X86_CC_A 0x7
123# define X86_CC_NBE 0x7
124# define X86_CC_S 0x8
125# define X86_CC_NS 0x9
126# define X86_CC_P 0xa
127# define X86_CC_PE 0xa
128# define X86_CC_NP 0xb
129# define X86_CC_PO 0xb
130# define X86_CC_L 0xc
131# define X86_CC_NGE 0xc
132# define X86_CC_GE 0xd
133# define X86_CC_NL 0xd
134# define X86_CC_LE 0xe
135# define X86_CC_NG 0xe
136# define X86_CC_G 0xf
137# define X86_CC_NLE 0xf
138# define mrm(md, r, m) *_jit->pc.uc++ = (md<<6) | (r<<3) | m
139# define sib(sc, i, b) *_jit->pc.uc++ = (sc<<6) | (i<<3) | b
140# define ic(c) *_jit->pc.uc++ = c
141# define is(s) *_jit->pc.us++ = s
142# define ii(i) *_jit->pc.ui++ = i
143# if __X64 && !__X64_32
144# define il(l) *_jit->pc.ul++ = l
145# else
146# define il(l) ii(l)
147# endif
4a71579b
PC
148# define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b)
149static void
150_rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
151# define rx(rd, md, rb, ri, ms) _rx(_jit, rd, md, rb, ri, ms)
152static void
153_rx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
ba86ff93
PC
154/*
155 * prefix 8 bits 0xc4 Three byte VEX
156 * 0xc5 Two byte VEX
157 * 0x8f Three byte XOP
158 * ~R 1 bit Inverted REX.R
159 * ~X 1 bit Inverted REX.X
160 * ~B 1 bit Inverted REX.B
161 * map 5 bits Opcode map to use
162 * W 1 bit REX.W for integer, otherwise opcode extension
163 * ~vvvv 4 bits Inverted XMM or YMM registers
164 * L 1 bit 128 bit vector if 0, 256 otherwise
165 * pp 2 bits Mandatory prefix
166 * 00 none
167 * 01 0x66
168 * 10 0xf3
169 * 11 0xf2
170 *
171 * Three byte VEX:
172 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
173 * | 1 1 0 0 0 1 0 0 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
174 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
175 * Three byte XOP:
176 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
177 * | 1 0 0 0 1 1 1 1 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
178 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
179 * Two byte VEX:
180 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
181 * | 1 1 0 0 0 1 0 1 | |~R | ~vvvv | L | pp |
182 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
183 */
184# define vex(r,x,b,map,w,vvvv,l,pp) _vex(_jit,r,x,b,map,w,vvvv,l,pp)
185static void
186_vex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
187 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
188# define nop(n) _nop(_jit, n)
189static void _nop(jit_state_t*, jit_int32_t);
190# define emms() is(0x770f)
191# define lea(md, rb, ri, ms, rd) _lea(_jit, md, rb, ri, ms, rd)
192static void
193_lea(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
194# define pushr(r0) _pushr(_jit, r0)
195static void _pushr(jit_state_t*, jit_int32_t) maybe_unused;
196# define popr(r0) _popr(_jit, r0)
197static void _popr(jit_state_t*, jit_int32_t) maybe_unused;
198# define xchgr(r0, r1) _xchgr(_jit, r0, r1)
199static void _xchgr(jit_state_t*, jit_int32_t, jit_int32_t);
200# define testr(r0, r1) _testr(_jit, r0, r1)
201static void _testr(jit_state_t*, jit_int32_t, jit_int32_t);
202# define testi(r0, i0) _testi(_jit, r0, i0)
203static void _testi(jit_state_t*, jit_int32_t, jit_word_t);
204# define cc(code, r0) _cc(_jit, code, r0)
205static void _cc(jit_state_t*, jit_int32_t, jit_int32_t);
206# define icmpr(r0, r1) alur(X86_CMP, r0, r1)
207# define alur(code, r0, r1) _alur(_jit, code, r0, r1)
208static void _alur(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
209# define icmpi(r0, i0) alui(X86_CMP, r0, i0)
210# define alui(code, r0, i0) _alui(_jit, code, r0, i0)
211static void _alui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
212# define iaddr(r0, r1) alur(X86_ADD, r0, r1)
213# define save(r0) _save(_jit, r0)
214static void _save(jit_state_t*, jit_int32_t);
215# define load(r0) _load(_jit, r0)
216static void _load(jit_state_t*, jit_int32_t);
217# define addr(r0, r1, r2) _addr(_jit, r0, r1, r2)
218static void _addr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
219# define iaddi(r0, i0) alui(X86_ADD, r0, i0)
220# define addi(r0, r1, i0) _addi(_jit, r0, r1, i0)
221static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
222#define addcr(r0, r1, r2) _addcr(_jit, r0, r1, r2)
223static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
224#define addci(r0, r1, i0) _addci(_jit, r0, r1, i0)
225static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
79bfeef6
PC
226# define iaddxr(r0, r1) _iaddxr(_jit, r0, r1)
227static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t);
4a71579b
PC
228# define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2)
229static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
230# define iaddxi(r0, i0) alui(X86_ADC, r0, i0)
231# define addxi(r0, r1, i0) _addxi(_jit, r0, r1, i0)
232static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
233# define isubr(r0, r1) alur(X86_SUB, r0, r1)
234# define subr(r0, r1, r2) _subr(_jit, r0, r1, r2)
235static void _subr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
236# define isubi(r0, i0) alui(X86_SUB, r0, i0)
237# define subi(r0, r1, i0) _subi(_jit, r0, r1, i0)
238static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
239# define subcr(r0, r1, r2) _subcr(_jit, r0, r1, r2)
240static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
241# define subci(r0, r1, i0) _subci(_jit, r0, r1, i0)
242static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
243# define isubxr(r0, r1) alur(X86_SBB, r0, r1)
244# define subxr(r0, r1, r2) _subxr(_jit, r0, r1, r2)
245static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
246# define isubxi(r0, i0) alui(X86_SBB, r0, i0)
247# define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0)
248static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
249# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0)
250static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
251# define imulr(r0, r1) _imulr(_jit, r0, r1)
252static void _imulr(jit_state_t*, jit_int32_t, jit_int32_t);
253# define imuli(r0, r1, i0) _imuli(_jit, r0, r1, i0)
254static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
255# define mulr(r0, r1, r2) _mulr(_jit, r0, r1, r2)
256static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
257# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
258static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
ba86ff93
PC
259# define hmulr(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 1)
260# define hmulr_u(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 0)
261# define hmuli(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 1)
262# define hmuli_u(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 0)
4a71579b
PC
263# define umulr(r0) unr(X86_IMUL, r0)
264# define umulr_u(r0) unr(X86_MUL, r0)
265# define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1)
266# define qmulr_u(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 0)
267# define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
268static void _iqmulr(jit_state_t*, jit_int32_t, jit_int32_t,
269 jit_int32_t,jit_int32_t, jit_bool_t);
270# define qmuli(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 1)
271# define qmuli_u(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 0)
272# define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign)
273static void _iqmuli(jit_state_t*, jit_int32_t, jit_int32_t,
274 jit_int32_t,jit_word_t, jit_bool_t);
275# define sign_extend_rdx_rax() _sign_extend_rdx_rax(_jit)
276static void _sign_extend_rdx_rax(jit_state_t*);
277# define idivr(r0) unr(X86_IDIV, r0)
278# define idivr_u(r0) unr(X86_DIV, r0)
279# define divremr(r0, r1, r2, i0, i1) _divremr(_jit, r0, r1, r2, i0, i1)
280static void
281_divremr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
282 jit_bool_t,jit_bool_t);
283# define divremi(r0, r1, i0, i1, i2) _divremi(_jit, r0, r1, i0, i1, i2)
284static void
285_divremi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_bool_t,jit_bool_t);
286# define divr(r0, r1, r2) divremr(r0, r1, r2, 1, 1)
287# define divi(r0, r1, i0) divremi(r0, r1, i0, 1, 1)
288# define divr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 1)
289# define divi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 1)
290# define qdivr(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 1)
291# define qdivr_u(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 0)
292# define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign)
293static void _iqdivr(jit_state_t*, jit_int32_t, jit_int32_t,
294 jit_int32_t,jit_int32_t, jit_bool_t);
295# define qdivi(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 1)
296# define qdivi_u(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 0)
297# define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign)
298static void _iqdivi(jit_state_t*, jit_int32_t, jit_int32_t,
299 jit_int32_t,jit_word_t, jit_bool_t);
300# define remr(r0, r1, r2) divremr(r0, r1, r2, 1, 0)
301# define remi(r0, r1, i0) divremi(r0, r1, i0, 1, 0)
302# define remr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 0)
303# define remi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 0)
304# define iandr(r0, r1) alur(X86_AND, r0, r1)
305# define andr(r0, r1, r2) _andr(_jit, r0, r1, r2)
306static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
307# define iandi(r0, i0) alui(X86_AND, r0, i0)
308# define andi(r0, r1, i0) _andi(_jit, r0, r1, i0)
309static void _andi(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
310# define iorr(r0, r1) alur(X86_OR, r0, r1)
311# define orr(r0, r1, r2) _orr(_jit, r0, r1, r2)
312static void _orr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
313# define iori(r0, i0) alui(X86_OR, r0, i0)
314# define ori(r0, r1, i0) _ori(_jit, r0, r1, i0)
315static void _ori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
316# define ixorr(r0, r1) alur(X86_XOR, r0, r1)
317# define xorr(r0, r1, r2) _xorr(_jit, r0, r1, r2)
318static void _xorr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
319# define ixori(r0, i0) alui(X86_XOR, r0, i0)
320# define xori(r0, r1, i0) _xori(_jit, r0, r1, i0)
321static void _xori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
322# define irotshr(code, r0) _irotshr(_jit, code, r0)
323static void _irotshr(jit_state_t*, jit_int32_t, jit_int32_t);
324# define rotshr(code, r0, r1, r2) _rotshr(_jit, code, r0, r1, r2)
325static void
326_rotshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
327# define irotshi(code, r0, i0) _irotshi(_jit, code, r0, i0)
328static void _irotshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
329# define rotshi(code, r0, r1, i0) _rotshi(_jit, code, r0, r1, i0)
330static void
331_rotshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
332# define lshr(r0, r1, r2) rotshr(X86_SHL, r0, r1, r2)
ba86ff93
PC
333# define qlshr(r0, r1, r2, r3) xlshr(1, r0, r1, r2, r3)
334# define xlshr(s, r0, r1, r2, r3) _xlshr(_jit, s, r0, r1, r2, r3)
335static void
336_xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
337# define lshi(r0, r1, i0) _lshi(_jit, r0, r1, i0)
338static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
ba86ff93
PC
339# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0)
340# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0)
341static void
342_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
343# define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3)
344# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0)
4a71579b
PC
345# define rshr(r0, r1, r2) rotshr(X86_SAR, r0, r1, r2)
346# define rshi(r0, r1, i0) rotshi(X86_SAR, r0, r1, i0)
347# define rshr_u(r0, r1, r2) rotshr(X86_SHR, r0, r1, r2)
348# define rshi_u(r0, r1, i0) rotshi(X86_SHR, r0, r1, i0)
ba86ff93
PC
349# define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3)
350# define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3)
351# define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3)
352static void
353_xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
354# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0)
355# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0)
356# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0)
357static void
358_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
359# define lrotr(r0, r1, r2) rotshr(X86_ROL, r0, r1, r2)
360# define lroti(r0, r1, i0) rotshi(X86_ROL, r0, r1, i0)
361# define rrotr(r0, r1, r2) rotshr(X86_ROR, r0, r1, r2)
362# define rroti(r0, r1, i0) rotshi(X86_ROR, r0, r1, i0)
4a71579b
PC
363# define unr(code, r0) _unr(_jit, code, r0)
364static void _unr(jit_state_t*, jit_int32_t, jit_int32_t);
365# define inegr(r0) unr(X86_NEG, r0)
366# define negr(r0, r1) _negr(_jit, r0, r1)
367static void _negr(jit_state_t*, jit_int32_t, jit_int32_t);
368# define icomr(r0) unr(X86_NOT, r0)
369# define comr(r0, r1) _comr(_jit, r0, r1)
370static void _comr(jit_state_t*, jit_int32_t, jit_int32_t);
371# if USE_INC_DEC
372# define incr(r0, r1) _incr(_jit, r0, r1)
373static void _incr(jit_state_t*, jit_int32_t, jit_int32_t);
374# define decr(r0, r1) _decr(_jit, r0, r1)
375static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
376# endif
79bfeef6
PC
377# define clor(r0, r1) _clor(_jit, r0, r1)
378static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
379# define clzr(r0, r1) _clzr(_jit, r0, r1)
380static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
381# define ctor(r0, r1) _ctor(_jit, r0, r1)
382static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
383# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
384static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
ba86ff93
PC
385# define rbitr(r0, r1) _rbitr(_jit, r0, r1)
386static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t);
387# define popcntr(r0, r1) _popcntr(_jit, r0, r1)
388static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t);
4a71579b
PC
389# define cr(code, r0, r1, r2) _cr(_jit, code, r0, r1, r2)
390static void
391_cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
392# define ci(code, r0, r1, i0) _ci(_jit, code, r0, r1, i0)
393static void
394_ci(jit_state_t *_jit, jit_int32_t, jit_int32_t, jit_int32_t, jit_word_t);
395# define ci0(code, r0, r1) _ci0(_jit, code, r0, r1)
396static void _ci0(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
397# define ltr(r0, r1, r2) _ltr(_jit, r0, r1, r2)
398static void _ltr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
399# define lti(r0, r1, i0) _lti(_jit, r0, r1, i0)
400static void _lti(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
401# define ltr_u(r0, r1, r2) _ltr_u(_jit, r0, r1, r2)
402static void _ltr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
403# define lti_u(r0, r1, i0) ci(X86_CC_B, r0, r1, i0)
404# define ler(r0, r1, r2) _ler(_jit, r0, r1, r2)
405static void _ler(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
406# define lei(r0, r1, i0) ci(X86_CC_LE, r0, r1, i0)
407# define ler_u(r0, r1, r2) _ler_u(_jit, r0, r1, r2)
408static void _ler_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
409# define lei_u(r0, r1, i0) _lei_u(_jit, r0, r1, i0)
410static void _lei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
411# define eqr(r0, r1, r2) _eqr(_jit, r0, r1, r2)
412static void _eqr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
413# define eqi(r0, r1, i0) _eqi(_jit, r0, r1, i0)
414static void _eqi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
415# define ger(r0, r1, r2) _ger(_jit, r0, r1, r2)
416static void _ger(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
417# define gei(r0, r1, i0) _gei(_jit, r0, r1, i0)
418static void _gei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
419# define ger_u(r0, r1, r2) _ger_u(_jit, r0, r1, r2)
420static void _ger_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
421# define gei_u(r0, r1, i0) _gei_u(_jit, r0, r1, i0)
422static void _gei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
423# define gtr(r0, r1, r2) _gtr(_jit, r0, r1, r2)
424static void _gtr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
425# define gti(r0, r1, i0) _ci(_jit, X86_CC_G, r0, r1, i0)
426# define gtr_u(r0, r1, r2) _gtr_u(_jit, r0, r1, r2)
427static void _gtr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
428# define gti_u(r0, r1, i0) _gti_u(_jit, r0, r1, i0)
429static void _gti_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
430# define ner(r0, r1, r2) _ner(_jit, r0, r1, r2)
431static void _ner(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
432# define nei(r0, r1, i0) _nei(_jit, r0, r1, i0)
433static void _nei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
434# define movr(r0, r1) _movr(_jit, r0, r1)
435static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
436# define imovi(r0, i0) _imovi(_jit, r0, i0)
437static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
438# define movi(r0, i0) _movi(_jit, r0, i0)
79bfeef6
PC
439static
440# if CAN_RIP_ADDRESS
441jit_word_t
442# else
443void
444# endif
445_movi(jit_state_t*, jit_int32_t, jit_word_t);
4a71579b
PC
446# define movi_p(r0, i0) _movi_p(_jit, r0, i0)
447static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
448# define movcr(r0, r1) _movcr(_jit, r0, r1)
449static void _movcr(jit_state_t*,jit_int32_t,jit_int32_t);
450# define movcr_u(r0, r1) _movcr_u(_jit, r0, r1)
451static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
452# define movsr(r0, r1) _movsr(_jit, r0, r1)
453static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
454# define movsr_u(r0, r1) _movsr_u(_jit, r0, r1)
455static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
ba3814c1
PC
456# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
457static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
458 jit_int32_t,jit_int32_t,jit_word_t);
459#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
460#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
1f22b268
PC
461#define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2)
462static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
463#define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2)
464static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
4a71579b
PC
465# if __X64 && !__X64_32
466# define movir(r0, r1) _movir(_jit, r0, r1)
467static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
468# define movir_u(r0, r1) _movir_u(_jit, r0, r1)
469static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
470# endif
40a44dcb
PC
471# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
472static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
473# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
474static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
4a71579b 475# if __X64 && !__X64_32
40a44dcb
PC
476#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1)
477static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
4a71579b 478#endif
ba86ff93
PC
479# define extr(r0, r1, i0, i1) _extr(_jit, r0, r1, i0, i1)
480static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
481# define extr_u(r0, r1, i0, i1) _extr_u(_jit, r0, r1, i0, i1)
482static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
483# define depr(r0, r1, i0, i1) _depr(_jit, r0, r1, i0, i1)
484static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
4a71579b
PC
485# define extr_c(r0, r1) _extr_c(_jit, r0, r1)
486static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
487# define extr_uc(r0, r1) _extr_uc(_jit, r0, r1)
488static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
489# define extr_s(r0, r1) movsr(r0, r1)
490# define extr_us(r0, r1) movsr_u(r0, r1)
491# if __X64 && !__X64_32
492# define extr_i(r0, r1) movir(r0, r1)
493# define extr_ui(r0, r1) movir_u(r0, r1)
494# endif
495# define ldr_c(r0, r1) _ldr_c(_jit, r0, r1)
496static void _ldr_c(jit_state_t*, jit_int32_t, jit_int32_t);
497# define ldi_c(r0, i0) _ldi_c(_jit, r0, i0)
498static void _ldi_c(jit_state_t*, jit_int32_t, jit_word_t);
499# define ldr_uc(r0, r1) _ldr_uc(_jit, r0, r1)
500static void _ldr_uc(jit_state_t*, jit_int32_t, jit_int32_t);
501# define ldi_uc(r0, i0) _ldi_uc(_jit, r0, i0)
502static void _ldi_uc(jit_state_t*, jit_int32_t, jit_word_t);
503# define ldr_s(r0, r1) _ldr_s(_jit, r0, r1)
504static void _ldr_s(jit_state_t*, jit_int32_t, jit_int32_t);
505# define ldi_s(r0, i0) _ldi_s(_jit, r0, i0)
506static void _ldi_s(jit_state_t*, jit_int32_t, jit_word_t);
507# define ldr_us(r0, r1) _ldr_us(_jit, r0, r1)
508static void _ldr_us(jit_state_t*, jit_int32_t, jit_int32_t);
509# define ldi_us(r0, i0) _ldi_us(_jit, r0, i0)
510static void _ldi_us(jit_state_t*, jit_int32_t, jit_word_t);
511# if __X32 || !__X64_32
512# define ldr_i(r0, r1) _ldr_i(_jit, r0, r1)
513static void _ldr_i(jit_state_t*, jit_int32_t, jit_int32_t);
514# define ldi_i(r0, i0) _ldi_i(_jit, r0, i0)
515static void _ldi_i(jit_state_t*, jit_int32_t, jit_word_t);
516# endif
517# if __X64
518# if __X64_32
519# define ldr_i(r0, r1) _ldr_ui(_jit, r0, r1)
520# define ldi_i(r0, i0) _ldi_ui(_jit, r0, i0)
521# else
522# define ldr_ui(r0, r1) _ldr_ui(_jit, r0, r1)
523# define ldi_ui(r0, i0) _ldi_ui(_jit, r0, i0)
524# endif
525static void _ldr_ui(jit_state_t*, jit_int32_t, jit_int32_t);
526static void _ldi_ui(jit_state_t*, jit_int32_t, jit_word_t);
527# if !__X64_32
528# define ldr_l(r0, r1) _ldr_l(_jit, r0, r1)
529static void _ldr_l(jit_state_t*, jit_int32_t, jit_int32_t);
530# define ldi_l(r0, i0) _ldi_l(_jit, r0, i0)
531static void _ldi_l(jit_state_t*, jit_int32_t, jit_word_t);
532# endif
533# endif
534# define ldxr_c(r0, r1, r2) _ldxr_c(_jit, r0, r1, r2)
535static void _ldxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
536# define ldxi_c(r0, r1, i0) _ldxi_c(_jit, r0, r1, i0)
537static void _ldxi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
538# define ldxr_uc(r0, r1, r2) _ldxr_uc(_jit, r0, r1, r2)
539static void _ldxr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
540# define ldxi_uc(r0, r1, i0) _ldxi_uc(_jit, r0, r1, i0)
541static void _ldxi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
542# define ldxr_s(r0, r1, r2) _ldxr_s(_jit, r0, r1, r2)
543static void _ldxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
544# define ldxi_s(r0, r1, i0) _ldxi_s(_jit, r0, r1, i0)
545static void _ldxi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
546# define ldxr_us(r0, r1, r2) _ldxr_us(_jit, r0, r1, r2)
547static void _ldxr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
548# define ldxi_us(r0, r1, i0) _ldxi_us(_jit, r0, r1, i0)
549static void _ldxi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
550# if __X32 || !__X64_32
551# define ldxr_i(r0, r1, r2) _ldxr_i(_jit, r0, r1, r2)
552static void _ldxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
553# define ldxi_i(r0, r1, i0) _ldxi_i(_jit, r0, r1, i0)
554static void _ldxi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
555# endif
556# if __X64
557# if __X64_32
558# define ldxr_i(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2)
559# define ldxi_i(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0)
560# else
561# define ldxr_ui(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2)
562# define ldxi_ui(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0)
563# endif
564static void _ldxr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
565static void _ldxi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
566# if !__X64_32
567# define ldxr_l(r0, r1, r2) _ldxr_l(_jit, r0, r1, r2)
568static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
569# define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0)
570static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
571# endif
572# endif
ba86ff93
PC
573# define unldr(r0, r1, i0) generic_unldr(r0, r1, i0)
574# define unldi(r0, i0, i1) generic_unldi(r0, i0, i1)
575# define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0)
576# define unldi_u(r0, i0, i1) generic_unldi_u(r0, i0, i1)
4a71579b
PC
577# define str_c(r0, r1) _str_c(_jit, r0, r1)
578static void _str_c(jit_state_t*, jit_int32_t, jit_int32_t);
579# define sti_c(i0, r0) _sti_c(_jit, i0, r0)
580static void _sti_c(jit_state_t*, jit_word_t, jit_int32_t);
581# define str_s(r0, r1) _str_s(_jit, r0, r1)
582static void _str_s(jit_state_t*, jit_int32_t, jit_int32_t);
583# define sti_s(i0, r0) _sti_s(_jit, i0, r0)
584static void _sti_s(jit_state_t*, jit_word_t, jit_int32_t);
585# define str_i(r0, r1) _str_i(_jit, r0, r1)
586static void _str_i(jit_state_t*, jit_int32_t, jit_int32_t);
587# define sti_i(i0, r0) _sti_i(_jit, i0, r0)
588static void _sti_i(jit_state_t*, jit_word_t, jit_int32_t);
589# if __X64 && !__X64_32
590# define str_l(r0, r1) _str_l(_jit, r0, r1)
591static void _str_l(jit_state_t*, jit_int32_t, jit_int32_t);
592# define sti_l(i0, r0) _sti_l(_jit, i0, r0)
593static void _sti_l(jit_state_t*, jit_word_t, jit_int32_t);
594# endif
595# define stxr_c(r0, r1, r2) _stxr_c(_jit, r0, r1, r2)
596static void _stxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
597# define stxi_c(i0, r0, r1) _stxi_c(_jit, i0, r0, r1)
598static void _stxi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
599# define stxr_s(r0, r1, r2) _stxr_s(_jit, r0, r1, r2)
600static void _stxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
601# define stxi_s(i0, r0, r1) _stxi_s(_jit, i0, r0, r1)
602static void _stxi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
603# define stxr_i(r0, r1, r2) _stxr_i(_jit, r0, r1, r2)
604static void _stxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
605# define stxi_i(i0, r0, r1) _stxi_i(_jit, i0, r0, r1)
606static void _stxi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
607# if __X64 && !__X64_32
608# define stxr_l(r0, r1, r2) _stxr_l(_jit, r0, r1, r2)
609static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
610# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1)
611static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
612# endif
ba86ff93
PC
613#define unstr(r0, r1, i0) generic_unstr(r0, r1, i0)
614#define unsti(i0, r0, i1) generic_unsti(i0, r0, i1)
4a71579b
PC
615# define jcc(code, i0) _jcc(_jit, code, i0)
616# define jo(i0) jcc(X86_CC_O, i0)
617# define jno(i0) jcc(X86_CC_NO, i0)
618# define jnae(i0) jcc(X86_CC_NAE, i0)
619# define jb(i0) jcc(X86_CC_B, i0)
620# define jc(i0) jcc(X86_CC_C, i0)
621# define jae(i0) jcc(X86_CC_AE, i0)
622# define jnb(i0) jcc(X86_CC_NB, i0)
623# define jnc(i0) jcc(X86_CC_NC, i0)
624# define je(i0) jcc(X86_CC_E, i0)
625# define jz(i0) jcc(X86_CC_Z, i0)
626# define jne(i0) jcc(X86_CC_NE, i0)
627# define jnz(i0) jcc(X86_CC_NZ, i0)
628# define jbe(i0) jcc(X86_CC_BE, i0)
629# define jna(i0) jcc(X86_CC_NA, i0)
630# define ja(i0) jcc(X86_CC_A, i0)
631# define jnbe(i0) jcc(X86_CC_NBE, i0)
632# define js(i0) jcc(X86_CC_S, i0)
633# define jns(i0) jcc(X86_CC_NS, i0)
634# define jp(i0) jcc(X86_CC_P, i0)
635# define jpe(i0) jcc(X86_CC_PE, i0)
636# define jnp(i0) jcc(X86_CC_NP, i0)
637# define jpo(i0) jcc(X86_CC_PO, i0)
638# define jl(i0) jcc(X86_CC_L, i0)
639# define jnge(i0) jcc(X86_CC_NGE, i0)
640# define jge(i0) jcc(X86_CC_GE, i0)
641# define jnl(i0) jcc(X86_CC_NL, i0)
642# define jle(i0) jcc(X86_CC_LE, i0)
643# define jng(i0) jcc(X86_CC_NG, i0)
644# define jg(i0) jcc(X86_CC_G, i0)
645# define jnle(i0) jcc(X86_CC_NLE, i0)
79bfeef6 646static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t);
4a71579b
PC
647# define jccs(code, i0) _jccs(_jit, code, i0)
648# define jos(i0) jccs(X86_CC_O, i0)
649# define jnos(i0) jccs(X86_CC_NO, i0)
650# define jnaes(i0) jccs(X86_CC_NAE, i0)
651# define jbs(i0) jccs(X86_CC_B, i0)
652# define jcs(i0) jccs(X86_CC_C, i0)
653# define jaes(i0) jccs(X86_CC_AE, i0)
654# define jnbs(i0) jccs(X86_CC_NB, i0)
655# define jncs(i0) jccs(X86_CC_NC, i0)
656# define jes(i0) jccs(X86_CC_E, i0)
657# define jzs(i0) jccs(X86_CC_Z, i0)
658# define jnes(i0) jccs(X86_CC_NE, i0)
659# define jnzs(i0) jccs(X86_CC_NZ, i0)
660# define jbes(i0) jccs(X86_CC_BE, i0)
661# define jnas(i0) jccs(X86_CC_NA, i0)
662# define jas(i0) jccs(X86_CC_A, i0)
663# define jnbes(i0) jccs(X86_CC_NBE, i0)
664# define jss(i0) jccs(X86_CC_S, i0)
665# define jnss(i0) jccs(X86_CC_NS, i0)
666# define jps(i0) jccs(X86_CC_P, i0)
667# define jpes(i0) jccs(X86_CC_PE, i0)
668# define jnps(i0) jccs(X86_CC_NP, i0)
669# define jpos(i0) jccs(X86_CC_PO, i0)
670# define jls(i0) jccs(X86_CC_L, i0)
671# define jnges(i0) jccs(X86_CC_NGE, i0)
672# define jges(i0) jccs(X86_CC_GE, i0)
673# define jnls(i0) jccs(X86_CC_NL, i0)
674# define jles(i0) jccs(X86_CC_LE, i0)
675# define jngs(i0) jccs(X86_CC_NG, i0)
676# define jgs(i0) jccs(X86_CC_G, i0)
677# define jnles(i0) jccs(X86_CC_NLE, i0)
79bfeef6 678static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t);
4a71579b 679# define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1)
79bfeef6
PC
680static jit_word_t _jcr(jit_state_t*,
681 jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
4a71579b 682# define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1)
79bfeef6
PC
683static jit_word_t _jci(jit_state_t*,
684 jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
4a71579b 685# define jci0(code, i0, r0) _jci0(_jit, code, i0, r0)
79bfeef6 686static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
4a71579b
PC
687# define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1)
688static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
689# define blti(i0, r0, i1) _blti(_jit, i0, r0, i1)
690static jit_word_t _blti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
691# define bltr_u(i0, r0, r1) _bltr_u(_jit, i0, r0, r1)
692static jit_word_t _bltr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
693# define blti_u(i0, r0, i1) _blti_u(_jit, i0, r0, i1)
694static jit_word_t _blti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
695# define bler(i0, r0, r1) _bler(_jit, i0, r0, r1)
696static jit_word_t _bler(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
697# define blei(i0, r0, i1) _blei(_jit, i0, r0, i1)
698static jit_word_t _blei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
699# define bler_u(i0, r0, r1) _bler_u(_jit, i0, r0, r1)
700static jit_word_t _bler_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
701# define blei_u(i0, r0, i1) _blei_u(_jit, i0, r0, i1)
702static jit_word_t _blei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
703# define beqr(i0, r0, r1) _beqr(_jit, i0, r0, r1)
704static jit_word_t _beqr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
705# define beqi(i0, r0, i1) _beqi(_jit, i0, r0, i1)
706static jit_word_t _beqi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
707# define bger(i0, r0, r1) _bger(_jit, i0, r0, r1)
708static jit_word_t _bger(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
709# define bgei(i0, r0, i1) _bgei(_jit, i0, r0, i1)
710static jit_word_t _bgei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
711# define bger_u(i0, r0, r1) _bger_u(_jit, i0, r0, r1)
712static jit_word_t _bger_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
713# define bgei_u(i0, r0, i1) _bgei_u(_jit, i0, r0, i1)
714static jit_word_t _bgei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
715# define bgtr(i0, r0, r1) _bgtr(_jit, i0, r0, r1)
716static jit_word_t _bgtr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
717# define bgti(i0, r0, i1) _bgti(_jit, i0, r0, i1)
718static jit_word_t _bgti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
719# define bgtr_u(i0, r0, r1) _bgtr_u(_jit, i0, r0, r1)
720static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
721# define bgti_u(i0, r0, i1) _bgti_u(_jit, i0, r0, i1)
722static jit_word_t _bgti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
723# define bner(i0, r0, r1) _bner(_jit, i0, r0, r1)
724static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
725# define bnei(i0, r0, i1) _bnei(_jit, i0, r0, i1)
726static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
727# define bmsr(i0, r0, r1) _bmsr(_jit, i0, r0, r1)
728static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
729# define bmsi(i0, r0, i1) _bmsi(_jit, i0, r0, i1)
730static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
731# define bmcr(i0, r0, r1) _bmcr(_jit, i0, r0, r1)
732static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
733# define bmci(i0, r0, i1) _bmci(_jit, i0, r0, i1)
734static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
735# define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1)
736static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
737# define boaddi(i0, r0, i1) _boaddi(_jit, i0, r0, i1)
738static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
739# define boaddr_u(i0, r0, r1) _boaddr_u(_jit, i0, r0, r1)
740static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
741# define boaddi_u(i0, r0, i1) _boaddi_u(_jit, i0, r0, i1)
742static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
743# define bxaddr(i0, r0, r1) _bxaddr(_jit, i0, r0, r1)
744static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
745# define bxaddi(i0, r0, i1) _bxaddi(_jit, i0, r0, i1)
746static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
747# define bxaddr_u(i0, r0, r1) _bxaddr_u(_jit, i0, r0, r1)
748static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
749# define bxaddi_u(i0, r0, i1) _bxaddi_u(_jit, i0, r0, i1)
750static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
751# define bosubr(i0, r0, r1) _bosubr(_jit, i0, r0, r1)
752static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
753# define bosubi(i0, r0, i1) _bosubi(_jit, i0, r0, i1)
754static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
755# define bosubr_u(i0, r0, r1) _bosubr_u(_jit, i0, r0, r1)
756static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
757# define bosubi_u(i0, r0, i1) _bosubi_u(_jit, i0, r0, i1)
758static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
759# define bxsubr(i0, r0, r1) _bxsubr(_jit, i0, r0, r1)
760static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
761# define bxsubi(i0, r0, i1) _bxsubi(_jit, i0, r0, i1)
762static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
763# define bxsubr_u(i0, r0, r1) _bxsubr_u(_jit, i0, r0, r1)
764static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
765# define bxsubi_u(i0, r0, i1) _bxsubi_u(_jit, i0, r0, i1)
766static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
767# define callr(r0) _callr(_jit, r0)
768static void _callr(jit_state_t*, jit_int32_t);
769# define calli(i0) _calli(_jit, i0)
770static jit_word_t _calli(jit_state_t*, jit_word_t);
519a9ea1
PC
771# if __X64
772# define calli_p(i0) _calli_p(_jit, i0)
773static jit_word_t _calli_p(jit_state_t*, jit_word_t);
774# else
775# define calli_p(i0) calli(i0)
776# endif
4a71579b
PC
777# define jmpr(r0) _jmpr(_jit, r0)
778static void _jmpr(jit_state_t*, jit_int32_t);
779# define jmpi(i0) _jmpi(_jit, i0)
780static jit_word_t _jmpi(jit_state_t*, jit_word_t);
519a9ea1
PC
781# if __X64
782# define jmpi_p(i0) _jmpi_p(_jit, i0)
783static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
784# else
785# define jmpi_p(i0) jmpi(i0)
786# endif
4a71579b 787# define jmpsi(i0) _jmpsi(_jit, i0)
79bfeef6 788static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t);
4a71579b
PC
789# define prolog(node) _prolog(_jit, node)
790static void _prolog(jit_state_t*, jit_node_t*);
791# define epilog(node) _epilog(_jit, node)
792static void _epilog(jit_state_t*, jit_node_t*);
793# define vastart(r0) _vastart(_jit, r0)
794static void _vastart(jit_state_t*, jit_int32_t);
795# define vaarg(r0, r1) _vaarg(_jit, r0, r1)
796static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
797# define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0)
798static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
79bfeef6
PC
799# define patch_at(instr, label) _patch_at(_jit, instr, label)
800static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
4a71579b
PC
801# if !defined(HAVE_FFSL)
802# if __X32
803# define ffsl(i) __builtin_ffs(i)
804# else
805# define ffsl(l) __builtin_ffsl(l)
806# endif
807# endif
1f22b268 808# define jit_cmov_p() jit_cpu.cmov
4a71579b
PC
809#endif
810
811#if CODE
812static void
813_rex(jit_state_t *_jit, jit_int32_t l, jit_int32_t w,
814 jit_int32_t r, jit_int32_t x, jit_int32_t b)
815{
816#if __X64
817 jit_int32_t v = 0x40 | (w << 3);
818
819 if (r != _NOREG)
820 v |= (r & 8) >> 1;
821 if (x != _NOREG)
822 v |= (x & 8) >> 2;
823 if (b != _NOREG)
824 v |= (b & 8) >> 3;
825 if (l || v != 0x40)
826 ic(v);
827#endif
828}
829
830static void
831_rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
832 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
833{
834 if (ri == _NOREG) {
835 if (rb == _NOREG) {
79bfeef6
PC
836 /* Use ms == _SCL8 to tell it is a %rip relative displacement */
837#if __X64
838 if (ms == _SCL8)
839#endif
840 mrm(0x00, r7(rd), 0x05);
841#if __X64
842 else {
843 mrm(0x00, r7(rd), 0x04);
844 sib(_SCL1, 0x04, 0x05);
845 }
4a71579b
PC
846#endif
847 ii(md);
848 }
849 else if (r7(rb) == _RSP_REGNO) {
850 if (md == 0) {
851 mrm(0x00, r7(rd), 0x04);
852 sib(ms, 0x04, 0x04);
853 }
854 else if ((jit_int8_t)md == md) {
855 mrm(0x01, r7(rd), 0x04);
856 sib(ms, 0x04, 0x04);
857 ic(md);
858 }
859 else {
860 mrm(0x02, r7(rd), 0x04);
861 sib(ms, 0x04, 0x04);
862 ii(md);
863 }
864 }
865 else {
866 if (md == 0 && r7(rb) != _RBP_REGNO)
867 mrm(0x00, r7(rd), r7(rb));
868 else if ((jit_int8_t)md == md) {
869 mrm(0x01, r7(rd), r7(rb));
870 ic(md);
871 }
872 else {
873 mrm(0x02, r7(rd), r7(rb));
874 ii(md);
875 }
876 }
877 }
878 else if (rb == _NOREG) {
879 mrm(0x00, r7(rd), 0x04);
880 sib(ms, r7(ri), 0x05);
881 ii(md);
882 }
883 else if (r8(ri) != _RSP_REGNO) {
884 if (md == 0 && r7(rb) != _RBP_REGNO) {
885 mrm(0x00, r7(rd), 0x04);
886 sib(ms, r7(ri), r7(rb));
887 }
888 else if ((jit_int8_t)md == md) {
889 mrm(0x01, r7(rd), 0x04);
890 sib(ms, r7(ri), r7(rb));
891 ic(md);
892 }
893 else {
894 mrm(0x02, r7(rd), 0x04);
895 sib(ms, r7(ri), r7(rb));
896 ic(md);
897 }
898 }
899 else {
900 fprintf(stderr, "illegal index register");
901 abort();
902 }
903}
904
ba86ff93
PC
905static void
906_vex(jit_state_t *_jit, jit_int32_t r, jit_int32_t x, jit_int32_t b,
907 jit_int32_t map, jit_int32_t w, jit_int32_t vvvv, jit_int32_t l,
908 jit_int32_t pp)
909{
910 jit_int32_t v;
911 if (r == _NOREG) r = 0;
912 if (x == _NOREG) x = 0;
913 if (b == _NOREG) b = 0;
914 if (map == 1 && w == 0 && ((x|b) & 8) == 0) {
915 /* Two byte prefix */
916 ic(0xc5);
917 /* ~R */
918 v = (r & 8) ? 0 : 0x80;
919 }
920 else {
921 /* Three byte prefix */
922 if (map >= 8)
923 ic(0x8f);
924 else
925 ic(0xc4);
926 /* map_select */
927 v = map;
928 /* ~R */
929 if (!(r & 8)) v |= 0x80;
930 /* ~X */
931 if (!(x & 8)) v |= 0x40;
932 /* ~B */
933 if (!(b & 8)) v |= 0x20;
934 ic(v);
935 /* W */
936 v = w ? 0x80 : 0;
937 }
938 /* ~vvvv */
939 v |= (~vvvv & 0x0f) << 3;
940 /* L */
941 if (l) v |= 0x04;
942 /* pp */
943 v |= pp;
944 ic(v);
945}
946
4a71579b
PC
947static void
948_nop(jit_state_t *_jit, jit_int32_t count)
949{
c0c16242
PC
950 jit_int32_t i;
951 while (count) {
952 if (count > 9)
953 i = 9;
954 else
955 i = count;
956 switch (i) {
957 case 0:
958 break;
959 case 1: /* NOP */
960 ic(0x90); break;
961 case 2: /* 66 NOP */
962 ic(0x66); ic(0x90);
963 break;
964 case 3: /* NOP DWORD ptr [EAX] */
965 ic(0x0f); ic(0x1f); ic(0x00);
966 break;
967 case 4: /* NOP DWORD ptr [EAX + 00H] */
968 ic(0x0f); ic(0x1f); ic(0x40); ic(0x00);
969 break;
970 case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
971 ic(0x0f); ic(0x1f); ic(0x44); ic(0x00);
972 ic(0x00);
973 break;
974 case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
975 ic(0x66); ic(0x0f); ic(0x1f); ic(0x44);
976 ic(0x00); ic(0x00);
977 break;
978 case 7: /* NOP DWORD ptr [EAX + 00000000H] */
979 ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000);
980 break;
981 case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
982 ic(0x0f); ic(0x1f); ic(0x84); ic(0x00);
983 ii(0x0000);
984 break;
985 case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
986 ic(0x66); ic(0x0f); ic(0x1f); ic(0x84);
987 ic(0x00); ii(0x0000);
988 break;
989 }
990 count -= i;
4a71579b
PC
991 }
992}
4a71579b
PC
993static void
994_lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
995 jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
996{
997 rex(0, WIDE, rd, ri, rb);
998 ic(0x8d);
999 rx(rd, md, rb, ri, ms);
1000}
1001
1002static void
1003_pushr(jit_state_t *_jit, jit_int32_t r0)
1004{
1005 rex(0, WIDE, 0, 0, r0);
1006 ic(0x50 | r7(r0));
1007}
1008
1009static void
1010_popr(jit_state_t *_jit, jit_int32_t r0)
1011{
1012 rex(0, WIDE, 0, 0, r0);
1013 ic(0x58 | r7(r0));
1014}
1015
1016static void
1017_xchgr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1018{
1019 rex(0, WIDE, r1, _NOREG, r0);
1020 ic(0x87);
1021 mrm(0x03, r7(r1), r7(r0));
1022}
1023
1024static void
1025_testr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1026{
1027 rex(0, WIDE, r1, _NOREG, r0);
1028 ic(0x85);
1029 mrm(0x03, r7(r1), r7(r0));
1030}
1031
1032static void
1033_testi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1034{
1035 rex(0, WIDE, _NOREG, _NOREG, r0);
1036 if (r0 == _RAX_REGNO)
1037 ic(0xa9);
1038 else {
1039 ic(0xf7);
1040 mrm(0x03, 0x00, r7(r0));
1041 }
1042 ii(i0);
1043}
1044
1045static void
1046_cc(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1047{
1048 rex(0, 0, _NOREG, _NOREG, r0);
1049 ic(0x0f);
1050 ic(0x90 | code);
1051 mrm(0x03, 0x00, r7(r0));
1052}
1053
1054static void
1055_alur(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
1056{
1057 rex(0, WIDE, r1, _NOREG, r0);
1058 ic(code | 0x01);
1059 mrm(0x03, r7(r1), r7(r0));
1060}
1061
1062static void
1063_alui(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
1064{
1065 jit_int32_t reg;
1066 if (can_sign_extend_int_p(i0)) {
1067 rex(0, WIDE, _NOREG, _NOREG, r0);
1068 if ((jit_int8_t)i0 == i0) {
1069 ic(0x83);
1070 ic(0xc0 | code | r7(r0));
1071 ic(i0);
1072 }
1073 else {
1074 if (r0 == _RAX_REGNO)
1075 ic(code | 0x05);
1076 else {
1077 ic(0x81);
1078 ic(0xc0 | code | r7(r0));
1079 }
1080 ii(i0);
1081 }
1082 }
1083 else {
1084 reg = jit_get_reg(jit_class_gpr);
1085 movi(rn(reg), i0);
1086 alur(code, r0, rn(reg));
1087 jit_unget_reg(reg);
1088 }
1089}
1090
1091static void
1092_save(jit_state_t *_jit, jit_int32_t r0)
1093{
1094 if (!_jitc->function->regoff[r0]) {
1095 _jitc->function->regoff[r0] = jit_allocai(sizeof(jit_word_t));
1096 _jitc->again = 1;
1097 }
1098 assert(!jit_regset_tstbit(&_jitc->regsav, r0));
1099 jit_regset_setbit(&_jitc->regsav, r0);
1100 stxi(_jitc->function->regoff[r0], _RBP_REGNO, r0);
1101}
1102
1103static void
1104_load(jit_state_t *_jit, jit_int32_t r0)
1105{
1106 assert(_jitc->function->regoff[r0]);
1107 assert(jit_regset_tstbit(&_jitc->regsav, r0));
1108 jit_regset_clrbit(&_jitc->regsav, r0);
1109 ldxi(r0, _RBP_REGNO, _jitc->function->regoff[r0]);
1110}
1111
1112static void
1113_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1114{
1115 if (r0 == r1)
1116 iaddr(r0, r2);
1117 else if (r0 == r2)
1118 iaddr(r0, r1);
1119 else
1120 lea(0, r1, r2, _SCL1, r0);
1121}
1122
1123static void
1124_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1125{
1126 jit_int32_t reg;
1127 if (i0 == 0)
1128 movr(r0, r1);
1129#if USE_INC_DEC
1130 else if (i0 == 1)
1131 incr(r0, r1);
1132 else if (i0 == -1)
1133 decr(r0, r1);
1134#endif
1135 else if (can_sign_extend_int_p(i0)) {
1136 if (r0 == r1)
1137 iaddi(r0, i0);
1138 else
1139 lea(i0, r1, _NOREG, _SCL1, r0);
1140 }
1141 else if (r0 != r1) {
1142 movi(r0, i0);
1143 iaddr(r0, r1);
1144 }
1145 else {
1146 reg = jit_get_reg(jit_class_gpr);
1147 movi(rn(reg), i0);
1148 iaddr(r0, rn(reg));
1149 jit_unget_reg(reg);
1150 }
1151}
1152
1153static void
1154_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1155{
1156 if (r0 == r2)
1157 iaddr(r0, r1);
1158 else {
1159 movr(r0, r1);
1160 iaddr(r0, r2);
1161 }
1162}
1163
1164static void
1165_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1166{
1167 jit_int32_t reg;
1168 if (can_sign_extend_int_p(i0)) {
1169 movr(r0, r1);
1170 iaddi(r0, i0);
1171 }
1172 else if (r0 == r1) {
1173 reg = jit_get_reg(jit_class_gpr);
1174 movi(rn(reg), i0);
1175 iaddr(r0, rn(reg));
1176 jit_unget_reg(reg);
1177 }
1178 else {
1179 movi(r0, i0);
1180 iaddr(r0, r1);
1181 }
1182}
1183
79bfeef6
PC
1184static void
1185_iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1186{
1187 /* FIXME: this is not doing what I did expect for the simple test case:
1188 * mov $0xffffffffffffffff, %rax -- rax = 0xffffffffffffffff (-1)
1189 * mov $0xffffffffffffffff, %r10 -- r10 = 0xffffffffffffffff (-1)
1190 * mov $0x1, %r11d -- r11 = 1
1191 * xor %rbx, %rbx -- rbx = 0
1192 * (gdb) p $eflags
1193 * $1 = [ PF ZF IF ]
1194 * add %r11, %rax -- r11 = 0x10000000000000000 (0)
1195 * does not fit in 64 bit ^
1196 * (gdb) p $eflags
1197 * $2 = [ CF PF AF ZF IF ]
1198 * adcx %r10, %rbx -- r10 = 0xffffffffffffffff (-1)
1199 * (gdb) p $eflags
1200 * $3 = [ CF PF AF ZF IF ]
1201 * (gdb) p/x $r10
1202 * $4 = 0xffffffffffffffff
1203 * but, r10 should be zero, as it is:
1204 * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF)
1205 * FIXME: maybe should only use ADCX in the third operation onward, that
1206 * is, after the first ADC? In either case, the add -1+0+carry should
1207 * have used and consumed the carry? At least this is what is expected
1208 * in Lightning...
1209 */
1210#if 0
1211 /* Significantly longer instruction, but avoid cpu stalls as only
1212 * the carry flag is used in a sequence. */
1213 if (jit_cpu.adx) {
1214 /* ADCX */
1215 ic(0x66);
1216 rex(0, WIDE, r1, _NOREG, r0);
1217 ic(0x0f);
1218 ic(0x38);
1219 ic(0xf6);
1220 mrm(0x03, r7(r1), r7(r0));
1221 }
1222 else
1223#endif
1224 alur(X86_ADC, r0, r1);
1225}
1226
4a71579b
PC
1227static void
1228_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1229{
1230 if (r0 == r2)
1231 iaddxr(r0, r1);
1232 else {
1233 movr(r0, r1);
1234 iaddxr(r0, r2);
1235 }
1236}
1237
1238static void
1239_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1240{
1241 jit_int32_t reg;
79bfeef6
PC
1242 if (
1243#if 0
1244 /* Do not mix ADC and ADCX */
1245 !jit_cpu.adx &&
1246#endif
1247 can_sign_extend_int_p(i0)) {
4a71579b
PC
1248 movr(r0, r1);
1249 iaddxi(r0, i0);
1250 }
1251 else if (r0 == r1) {
1252 reg = jit_get_reg(jit_class_gpr);
1253 movi(rn(reg), i0);
1254 iaddxr(r0, rn(reg));
1255 jit_unget_reg(reg);
1256 }
1257 else {
1258 movi(r0, i0);
1259 iaddxr(r0, r1);
1260 }
1261}
1262
1263static void
1264_subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1265{
1266 if (r1 == r2)
1267 ixorr(r0, r0);
1268 else if (r0 == r2) {
1269 isubr(r0, r1);
1270 inegr(r0);
1271 }
1272 else {
1273 movr(r0, r1);
1274 isubr(r0, r2);
1275 }
1276}
1277
1278static void
1279_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1280{
1281 jit_int32_t reg;
1282 if (i0 == 0)
1283 movr(r0, r1);
1284#if USE_INC_DEC
1285 else if (i0 == 1)
1286 decr(r0, r1);
1287 else if (i0 == -1)
1288 incr(r0, r1);
1289#endif
1290 else if (can_sign_extend_int_p(i0)) {
1291 if (r0 == r1)
1292 isubi(r0, i0);
1293 else
1294 lea(-i0, r1, _NOREG, _SCL1, r0);
1295 }
1296 else if (r0 != r1) {
1297 movi(r0, -i0);
1298 iaddr(r0, r1);
1299 }
1300 else {
1301 reg = jit_get_reg(jit_class_gpr);
1302 movi(rn(reg), i0);
1303 isubr(r0, rn(reg));
1304 jit_unget_reg(reg);
1305 }
1306}
1307
1308static void
1309_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1310{
1311 jit_int32_t reg;
1312 if (r0 == r2 && r0 != r1) {
1313 reg = jit_get_reg(jit_class_gpr);
1314 movr(rn(reg), r0);
1315 movr(r0, r1);
1316 isubr(r0, rn(reg));
1317 jit_unget_reg(reg);
1318 }
1319 else {
1320 movr(r0, r1);
1321 isubr(r0, r2);
1322 }
1323}
1324
1325static void
1326_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1327{
1328 jit_int32_t reg;
1329 movr(r0, r1);
1330 if (can_sign_extend_int_p(i0))
1331 isubi(r0, i0);
1332 else {
1333 reg = jit_get_reg(jit_class_gpr);
1334 movi(rn(reg), i0);
1335 isubr(r0, rn(reg));
1336 jit_unget_reg(reg);
1337 }
1338}
1339
1340static void
1341_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1342{
1343 jit_int32_t reg;
1344 if (r0 == r2 && r0 != r1) {
1345 reg = jit_get_reg(jit_class_gpr);
1346 movr(rn(reg), r0);
1347 movr(r0, r1);
1348 isubxr(r0, rn(reg));
1349 jit_unget_reg(reg);
1350 }
1351 else {
1352 movr(r0, r1);
1353 isubxr(r0, r2);
1354 }
1355}
1356
1357static void
1358_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1359{
1360 jit_int32_t reg;
1361 movr(r0, r1);
1362 if (can_sign_extend_int_p(i0))
1363 isubxi(r0, i0);
1364 else {
1365 reg = jit_get_reg(jit_class_gpr);
1366 imovi(rn(reg), i0);
1367 isubxr(r0, rn(reg));
1368 jit_unget_reg(reg);
1369 }
1370}
1371
1372static void
1373_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1374{
1375 subi(r0, r1, i0);
1376 negr(r0, r0);
1377}
1378
1379static void
1380_imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1381{
1382 rex(0, WIDE, r0, _NOREG, r1);
1383 ic(0x0f);
1384 ic(0xaf);
1385 mrm(0x03, r7(r0), r7(r1));
1386}
1387
1388static void
1389_imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1390{
1391 jit_int32_t reg;
1392 if (can_sign_extend_int_p(i0)) {
1393 rex(0, WIDE, r0, _NOREG, r1);
1394 if ((jit_int8_t)i0 == i0) {
1395 ic(0x6b);
1396 mrm(0x03, r7(r0), r7(r1));
1397 ic(i0);
1398 }
1399 else {
1400 ic(0x69);
1401 mrm(0x03, r7(r0), r7(r1));
1402 ii(i0);
1403 }
1404 }
1405 else {
1406 reg = jit_get_reg(jit_class_gpr);
1407 movi(rn(reg), i0);
1408 imulr(r0, rn(reg));
1409 jit_unget_reg(reg);
1410 }
1411}
1412
1413static void
1414_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1415{
1416 if (r0 == r1)
1417 imulr(r0, r2);
1418 else if (r0 == r2)
1419 imulr(r0, r1);
1420 else {
1421 movr(r0, r1);
1422 imulr(r0, r2);
1423 }
1424}
1425
1426static void
1427_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1428{
1429 switch (i0) {
1430 case 0:
1431 ixorr(r0, r0);
1432 break;
1433 case 1:
1434 movr(r0, r1);
1435 break;
1436 case -1:
1437 negr(r0, r1);
1438 break;
1439 case 2:
1440 lea(0, _NOREG, r1, _SCL2, r0);
1441 break;
1442 case 4:
1443 lea(0, _NOREG, r1, _SCL4, r0);
1444 break;
1445 case 8:
1446 lea(0, _NOREG, r1, _SCL8, r0);
1447 break;
1448 default:
1449 if (i0 > 0 && !(i0 & (i0 - 1)))
1450 lshi(r0, r1, ffsl(i0) - 1);
1451 else if (can_sign_extend_int_p(i0))
1452 imuli(r0, r1, i0);
1453 else if (r0 != r1) {
1454 movi(r0, i0);
1455 imulr(r0, r1);
1456 }
1457 else
1458 imuli(r0, r0, i0);
1459 break;
1460 }
1461}
1462
1463#define savset(rn) \
ba86ff93
PC
1464 do { \
1465 if (r0 != rn) { \
1466 sav |= 1 << rn; \
1467 if (r1 != rn && r2 != rn) \
1468 set |= 1 << rn; \
1469 } \
1470 } while (0)
4a71579b 1471#define isavset(rn) \
ba86ff93
PC
1472 do { \
1473 if (r0 != rn) { \
1474 sav |= 1 << rn; \
1475 if (r1 != rn) \
1476 set |= 1 << rn; \
1477 } \
1478 } while (0)
4a71579b 1479#define qsavset(rn) \
ba86ff93
PC
1480 do { \
1481 if (r0 != rn && r1 != rn) { \
1482 sav |= 1 << rn; \
1483 if (r2 != rn && r3 != rn) \
1484 set |= 1 << rn; \
1485 } \
1486 } while (0)
4a71579b 1487#define allocr(rn, rv) \
ba86ff93
PC
1488 do { \
1489 if (set & (1 << rn)) \
1490 (void)jit_get_reg(rv|jit_class_gpr|jit_class_named); \
1491 if (sav & (1 << rn)) { \
1492 if ( jit_regset_tstbit(&_jitc->regsav, rv) || \
1493 !jit_regset_tstbit(&_jitc->reglive, rv)) \
1494 sav &= ~(1 << rn); \
1495 else \
1496 save(rv); \
1497 } \
1498 } while (0)
4a71579b 1499#define clear(rn, rv) \
ba86ff93
PC
1500 do { \
1501 if (set & (1 << rn)) \
1502 jit_unget_reg(rv); \
1503 if (sav & (1 << rn)) \
1504 load(rv); \
1505 } while (0)
1506
4a71579b
PC
1507static void
1508_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1509 jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1510{
1511 jit_int32_t mul;
1512 jit_int32_t sav;
1513 jit_int32_t set;
1514
1515 sav = set = 0;
1516 qsavset(_RDX_REGNO);
1517 qsavset(_RAX_REGNO);
1518 allocr(_RDX_REGNO, _RDX);
1519 allocr(_RAX_REGNO, _RAX);
1520
1521 if (r3 == _RAX_REGNO)
1522 mul = r2;
1523 else {
1524 mul = r3;
1525 movr(_RAX_REGNO, r2);
1526 }
1527 if (sign)
1528 umulr(mul);
1529 else
1530 umulr_u(mul);
1531
ba86ff93
PC
1532 if (r0 != JIT_NOREG) {
1533 if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1534 xchgr(_RAX_REGNO, _RDX_REGNO);
1535 else {
1536 if (r0 != _RDX_REGNO)
1537 movr(r0, _RAX_REGNO);
1538 movr(r1, _RDX_REGNO);
1539 if (r0 == _RDX_REGNO)
1540 movr(r0, _RAX_REGNO);
1541 }
1542 }
4a71579b 1543 else {
ba86ff93 1544 assert(r1 != JIT_NOREG);
4a71579b 1545 movr(r1, _RDX_REGNO);
4a71579b
PC
1546 }
1547
1548 clear(_RDX_REGNO, _RDX);
1549 clear(_RAX_REGNO, _RAX);
1550}
1551
1552static void
1553_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1554 jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1555{
1556 jit_int32_t reg;
1557
1558 if (i0 == 0) {
1559 ixorr(r0, r0);
1560 ixorr(r1, r1);
1561 }
1562 else {
1563 reg = jit_get_reg(jit_class_gpr);
1564 movi(rn(reg), i0);
1565 if (sign)
1566 qmulr(r0, r1, r2, rn(reg));
1567 else
1568 qmulr_u(r0, r1, r2, rn(reg));
1569 jit_unget_reg(reg);
1570 }
1571}
1572
1573static void
1574_sign_extend_rdx_rax(jit_state_t *_jit)
1575{
1576 rex(0, WIDE, 0, 0, 0);
1577 ic(0x99);
1578}
1579
1580static void
1581_divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
1582 jit_bool_t sign, jit_bool_t divide)
1583{
1584 jit_int32_t div;
1585 jit_int32_t reg;
1586 jit_int32_t set;
1587 jit_int32_t sav;
1588 jit_int32_t use;
1589
1590 sav = set = use = 0;
1591 savset(_RDX_REGNO);
1592 savset(_RAX_REGNO);
1593 allocr(_RDX_REGNO, _RDX);
1594 allocr(_RAX_REGNO, _RAX);
1595
1596 if (r2 == _RAX_REGNO) {
1597 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1598 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1599 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1600 jit_class_gpr|jit_class_named);
1601 use = 1;
1602 div = rn(reg);
1603 movr(div, _RAX_REGNO);
1604 if (r1 != _RAX_REGNO)
1605 movr(_RAX_REGNO, r1);
1606 }
1607 else {
1608 if (r0 == r1)
1609 xchgr(r0, _RAX_REGNO);
1610 else {
1611 if (r0 != _RAX_REGNO)
1612 movr(r0, _RAX_REGNO);
1613 if (r1 != _RAX_REGNO)
1614 movr(_RAX_REGNO, r1);
1615 }
1616 div = r0;
1617 }
1618 }
1619 else if (r2 == _RDX_REGNO) {
1620 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1621 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1622 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1623 jit_class_gpr|jit_class_named);
1624 use = 1;
1625 div = rn(reg);
1626 movr(div, _RDX_REGNO);
1627 if (r1 != _RAX_REGNO)
1628 movr(_RAX_REGNO, r1);
1629 }
1630 else {
1631 if (r1 != _RAX_REGNO)
1632 movr(_RAX_REGNO, r1);
1633 movr(r0, _RDX_REGNO);
1634 div = r0;
1635 }
1636 }
1637 else {
1638 if (r1 != _RAX_REGNO)
1639 movr(_RAX_REGNO, r1);
1640 div = r2;
1641 }
1642
1643 if (sign) {
1644 sign_extend_rdx_rax();
1645 idivr(div);
1646 }
1647 else {
1648 ixorr(_RDX_REGNO, _RDX_REGNO);
1649 idivr_u(div);
1650 }
1651
1652 if (use)
1653 jit_unget_reg(reg);
1654
1655 if (divide)
1656 movr(r0, _RAX_REGNO);
1657 else
1658 movr(r0, _RDX_REGNO);
1659
1660 clear(_RDX_REGNO, _RDX);
1661 clear(_RAX_REGNO, _RAX);
1662}
1663
1664static void
1665_divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
1666 jit_bool_t sign, jit_bool_t divide)
1667{
1668 jit_int32_t reg;
1669 jit_int32_t div;
1670 jit_int32_t sav;
1671 jit_int32_t set;
1672 jit_int32_t use;
1673
1674 if (divide) {
1675 switch (i0) {
1676 case 1:
1677 movr(r0, r1);
1678 return;
1679 case -1:
1680 if (sign) {
1681 negr(r0, r1);
1682 return;
1683 }
1684 break;
1685 default:
1686 if (i0 > 0 && !(i0 & (i0 - 1))) {
1687 movr(r0, r1);
1688 if (sign)
1689 rshi(r0, r0, ffsl(i0) - 1);
1690 else
1691 rshi_u(r0, r0, ffsl(i0) - 1);
1692 return;
1693 }
1694 break;
1695 }
1696 }
1697 else if (i0 == 1 || (sign && i0 == -1)) {
1698 ixorr(r0, r0);
1699 return;
1700 }
1701 else if (!sign && i0 > 0 && !(i0 & (i0 - 1))) {
1702 if (can_sign_extend_int_p(i0)) {
1703 movr(r0, r1);
1704 iandi(r0, i0 - 1);
1705 }
1706 else if (r0 != r1) {
1707 movi(r0, i0 - 1);
1708 iandr(r0, r1);
1709 }
1710 else {
1711 reg = jit_get_reg(jit_class_gpr);
1712 movi(rn(reg), i0 - 1);
1713 iandr(r0, rn(reg));
1714 jit_unget_reg(reg);
1715 }
1716 return;
1717 }
1718
1719 sav = set = use = 0;
1720 isavset(_RDX_REGNO);
1721 isavset(_RAX_REGNO);
1722 allocr(_RDX_REGNO, _RDX);
1723 allocr(_RAX_REGNO, _RAX);
1724
1725 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) {
1726 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1727 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1728 jit_class_gpr|jit_class_named);
1729 use = 1;
1730 div = rn(reg);
1731 }
1732 else
1733 div = r0;
1734
1735 movi(div, i0);
1736 movr(_RAX_REGNO, r1);
1737
1738 if (sign) {
1739 sign_extend_rdx_rax();
1740 idivr(div);
1741 }
1742 else {
1743 ixorr(_RDX_REGNO, _RDX_REGNO);
1744 idivr_u(div);
1745 }
1746
1747 if (use)
1748 jit_unget_reg(reg);
1749
1750 if (divide)
1751 movr(r0, _RAX_REGNO);
1752 else
1753 movr(r0, _RDX_REGNO);
1754
1755 clear(_RDX_REGNO, _RDX);
1756 clear(_RAX_REGNO, _RAX);
1757}
1758
1759static void
1760_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1761 jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1762{
1763 jit_int32_t div;
1764 jit_int32_t reg;
1765 jit_int32_t sav;
1766 jit_int32_t set;
1767 jit_int32_t use;
1768
1769 sav = set = use = 0;
1770 qsavset(_RDX_REGNO);
1771 qsavset(_RAX_REGNO);
1772 allocr(_RDX_REGNO, _RDX);
1773 allocr(_RAX_REGNO, _RAX);
1774 if (r3 == _RAX_REGNO) {
1775 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1776 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1777 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1778 jit_class_gpr|jit_class_named);
1779 use = 1;
1780 div = rn(reg);
1781 movr(div, _RAX_REGNO);
1782 if (r2 != _RAX_REGNO)
1783 movr(_RAX_REGNO, r2);
1784 }
1785 else {
1786 if (r0 == r2)
1787 xchgr(r0, _RAX_REGNO);
1788 else {
1789 if (r0 != _RAX_REGNO)
1790 movr(r0, _RAX_REGNO);
1791 if (r2 != _RAX_REGNO)
1792 movr(_RAX_REGNO, r2);
1793 }
1794 div = r0;
1795 }
1796 }
1797 else if (r3 == _RDX_REGNO) {
1798 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1799 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1800 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1801 jit_class_gpr|jit_class_named);
1802 use = 1;
1803 div = rn(reg);
1804 movr(div, _RDX_REGNO);
1805 if (r2 != _RAX_REGNO)
1806 movr(_RAX_REGNO, r2);
1807 }
1808 else {
1809 if (r2 != _RAX_REGNO)
1810 movr(_RAX_REGNO, r2);
1811 movr(r0, _RDX_REGNO);
1812 div = r0;
1813 }
1814 }
1815 else {
1816 if (r2 != _RAX_REGNO)
1817 movr(_RAX_REGNO, r2);
1818 div = r3;
1819 }
1820 if (sign) {
1821 sign_extend_rdx_rax();
1822 idivr(div);
1823 }
1824 else {
1825 ixorr(_RDX_REGNO, _RDX_REGNO);
1826 idivr_u(div);
1827 }
1828 if (use)
1829 jit_unget_reg(reg);
1830
1831 if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1832 xchgr(_RAX_REGNO, _RDX_REGNO);
1833 else {
1834 if (r0 != _RDX_REGNO)
1835 movr(r0, _RAX_REGNO);
1836 movr(r1, _RDX_REGNO);
1837 if (r0 == _RDX_REGNO)
1838 movr(r0, _RAX_REGNO);
1839 }
1840
1841 clear(_RDX_REGNO, _RDX);
1842 clear(_RAX_REGNO, _RAX);
1843}
1844
1845static void
1846_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1847 jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1848{
1849 jit_int32_t reg;
1850
1851 reg = jit_get_reg(jit_class_gpr);
1852 movi(rn(reg), i0);
1853 if (sign)
1854 qdivr(r0, r1, r2, rn(reg));
1855 else
1856 qdivr_u(r0, r1, r2, rn(reg));
1857 jit_unget_reg(reg);
1858}
4a71579b
PC
1859
1860static void
1861_andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1862{
1863 if (r1 == r2)
1864 movr(r0, r1);
1865 else if (r0 == r1)
1866 iandr(r0, r2);
1867 else if (r0 == r2)
1868 iandr(r0, r1);
1869 else {
1870 movr(r0, r1);
1871 iandr(r0, r2);
1872 }
1873}
1874
1875static void
1876_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1877{
1878 jit_int32_t reg;
1879
1880 if (i0 == 0)
1881 ixorr(r0, r0);
1882 else if (i0 == -1)
1883 movr(r0, r1);
1884 else if (r0 == r1) {
1885 if (can_sign_extend_int_p(i0))
1886 iandi(r0, i0);
1887 else {
1888 reg = jit_get_reg(jit_class_gpr);
1889 movi(rn(reg), i0);
1890 iandr(r0, rn(reg));
1891 jit_unget_reg(reg);
1892 }
1893 }
1894 else {
1895 movi(r0, i0);
1896 iandr(r0, r1);
1897 }
1898}
1899
1900static void
1901_orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1902{
1903 if (r1 == r2)
1904 movr(r0, r1);
1905 else if (r0 == r1)
1906 iorr(r0, r2);
1907 else if (r0 == r2)
1908 iorr(r0, r1);
1909 else {
1910 movr(r0, r1);
1911 iorr(r0, r2);
1912 }
1913}
1914
1915static void
1916_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1917{
1918 jit_int32_t reg;
1919 if (i0 == 0)
1920 movr(r0, r1);
1921 else if (i0 == -1)
1922 movi(r0, -1);
1923 else if (can_sign_extend_int_p(i0)) {
1924 movr(r0, r1);
1925 iori(r0, i0);
1926 }
1927 else if (r0 != r1) {
1928 movi(r0, i0);
1929 iorr(r0, r1);
1930 }
1931 else {
1932 reg = jit_get_reg(jit_class_gpr);
1933 movi(rn(reg), i0);
1934 iorr(r0, rn(reg));
1935 jit_unget_reg(reg);
1936 }
1937}
1938
1939static void
1940_xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1941{
1942 if (r1 == r2)
1943 ixorr(r0, r0);
1944 else if (r0 == r1)
1945 ixorr(r0, r2);
1946 else if (r0 == r2)
1947 ixorr(r0, r1);
1948 else {
1949 movr(r0, r1);
1950 ixorr(r0, r2);
1951 }
1952}
1953
1954static void
1955_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1956{
1957 jit_int32_t reg;
1958 if (i0 == 0)
1959 movr(r0, r1);
1960 else if (i0 == -1)
1961 comr(r0, r1);
1962 else if (can_sign_extend_int_p(i0)) {
1963 movr(r0, r1);
1964 ixori(r0, i0);
1965 }
1966 else if (r0 != r1) {
1967 movi(r0, i0);
1968 ixorr(r0, r1);
1969 }
1970 else {
1971 reg = jit_get_reg(jit_class_gpr);
1972 movi(rn(reg), i0);
1973 ixorr(r0, rn(reg));
1974 jit_unget_reg(reg);
1975 }
1976}
1977
1978static void
1979_irotshr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1980{
1981 rex(0, WIDE, _RCX_REGNO, _NOREG, r0);
1982 ic(0xd3);
1983 mrm(0x03, code, r7(r0));
1984}
1985
1986static void
1987_rotshr(jit_state_t *_jit, jit_int32_t code,
1988 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1989{
1990 jit_int32_t reg;
1991 jit_int32_t use;
1992
1993 if (r0 == _RCX_REGNO) {
1994 reg = jit_get_reg(jit_class_gpr);
1995 movr(rn(reg), r1);
1996 if (r2 != _RCX_REGNO)
1997 movr(_RCX_REGNO, r2);
1998 irotshr(code, rn(reg));
1999 movr(_RCX_REGNO, rn(reg));
2000 jit_unget_reg(reg);
2001 }
2002 else if (r2 != _RCX_REGNO) {
2003 use = !jit_reg_free_p(_RCX);
2004 if (use) {
2005 reg = jit_get_reg(jit_class_gpr);
2006 movr(rn(reg), _RCX_REGNO);
2007 }
2008 else
2009 reg = 0;
2010 if (r1 == _RCX_REGNO) {
2011 if (r0 == r2)
2012 xchgr(r0, _RCX_REGNO);
2013 else {
2014 movr(r0, r1);
2015 movr(_RCX_REGNO, r2);
2016 }
2017 }
2018 else {
2019 movr(_RCX_REGNO, r2);
2020 movr(r0, r1);
2021 }
2022 irotshr(code, r0);
2023 if (use) {
2024 movr(_RCX_REGNO, rn(reg));
2025 jit_unget_reg(reg);
2026 }
2027 }
2028 else {
2029 movr(r0, r1);
2030 irotshr(code, r0);
2031 }
2032}
2033
2034static void
2035_irotshi(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
2036{
2037 rex(0, WIDE, _NOREG, _NOREG, r0);
2038 if (i0 == 1) {
2039 ic(0xd1);
2040 mrm(0x03, code, r7(r0));
2041 }
2042 else {
2043 ic(0xc1);
2044 mrm(0x03, code, r7(r0));
2045 ic(i0);
2046 }
2047}
2048
2049static void
2050_rotshi(jit_state_t *_jit, jit_int32_t code,
2051 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2052{
2053 movr(r0, r1);
2054 if (i0)
2055 irotshi(code, r0, i0);
2056}
2057
ba86ff93
PC
2058static void
2059_xlshr(jit_state_t *_jit, jit_bool_t sign,
2060 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
2061{
2062 jit_int32_t sav, set;
2063 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
2064 jit_word_t over, zero, over_done, done;
2065 sav = set = 0;
2066 /* %RCX must be used for shift. */
2067 qsavset(_RCX_REGNO);
2068 allocr(_RCX_REGNO, _RCX);
2069 /* Almost certainly not %RCX */
2070 t1 = r1;
2071 if (r0 == _RCX_REGNO) {
2072 s0 = jit_get_reg(jit_class_gpr);
2073 t0 = rn(s0);
2074 }
2075 else {
2076 t0 = r0;
2077 /* r0 == r1 is undefined behavior */
2078 if (r1 == _RCX_REGNO) {
2079 s1 = jit_get_reg(jit_class_gpr);
2080 t1 = rn(s1);
2081 }
2082 }
2083 /* Allocate a temporary if a register is used more than once, or if
2084 * the value to shift is %RCX */
2085 if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
2086 s2 = jit_get_reg(jit_class_gpr);
2087 t2 = rn(s2);
2088 movr(t2, r2);
2089 }
2090 else
2091 t2 = r2;
2092 /* Allocate temporary if shift is also one of the outputs */
2093 if (r0 == r3 || r1 == r3) {
2094 s3 = jit_get_reg(jit_class_gpr);
2095 t3 = rn(s3);
2096 movr(t3, r3);
2097 }
2098 else
2099 t3 = r3;
2100 /* Bits to shift right */
2101 movi(t1, 0);
2102 /* Shift in %RCX */
2103 /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
2104 movr(_RCX_REGNO, t3);
2105 /* Copy value to low register */
2106 movr(t0, t2);
2107 /* SHLD shifts t0 left pulling extra bits in the right from t1.
2108 * It is very handly to shift bignums, but lightning does not support
2109 * these, nor 128 bit integers. The use of q{l,}sh{r,i} is to verify
2110 * if there precision loss in a shift and/or have it as a quick way
2111 * to multiply or divide by powers of two. */
2112 /* SHLD */
2113 rex(0, WIDE, t1, _NOREG, t0);
2114 ic(0xf);
2115 ic(0xa5);
2116 mrm(0x03, r7(t1), r7(t0));
2117 /* Must swap results if shift value is __WORDSIZE */
2118 alui(X86_CMP, t3, __WORDSIZE);
2119 over = jes(_jit->pc.w);
2120 /* Calculate bits to shift right and fill high register */
2121 rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
2122 if (sign)
2123 rshr(t1, t2, _RCX_REGNO);
2124 else
2125 rshr_u(t1, t2, _RCX_REGNO);
2126 /* FIXME t3 == %rcx only happens in 32 bit as %a3 (JIT_A3) is not
2127 * available -- it might be made available at some point, to
2128 * allow optimizing usage or arguments in registers. For now
2129 * keep the code, as one might cheat and use _RCX directly,
2130 * what is not officially supported, but *must* work. */
2131 /* Need to sign extend high register if shift value is zero */
2132 if (t3 == _RCX_REGNO)
2133 alui(X86_CMP, t3, __WORDSIZE);
2134 else
2135 alui(X86_CMP, t3, 0);
2136 /* Finished. */
2137 zero = jes(_jit->pc.w);
2138 done = jmpsi(_jit->pc.w);
2139 /* Swap registers if shift is __WORDSIZE */
2140 patch_at(over, _jit->pc.w);
2141 xchgr(t0, t1);
2142 over_done = jmpsi(_jit->pc.w);
2143 /* If shift value is zero */
2144 patch_at(zero, _jit->pc.w);
2145 if (sign)
2146 rshi(t1, t2, __WORDSIZE - 1);
2147 else
2148 movi(t1, 0);
2149 patch_at(over_done, _jit->pc.w);
2150 patch_at(done, _jit->pc.w);
2151 /* Release %RCX (if spilled) after branches */
2152 clear(_RCX_REGNO, _RCX);
2153 if (t3 != r3)
2154 jit_unget_reg(s3);
2155 if (t2 != r2)
2156 jit_unget_reg(s2);
2157 if (t1 != r1) {
2158 movr(r1, t1);
2159 jit_unget_reg(s1);
2160 }
2161 if (t0 != r0) {
2162 movr(r0, t0);
2163 jit_unget_reg(s0);
2164 }
2165}
2166
4a71579b
PC
2167static void
2168_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2169{
2170 if (i0 == 0)
2171 movr(r0, r1);
2172 else if (i0 <= 3)
2173 lea(0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
2174 else
2175 rotshi(X86_SHL, r0, r1, i0);
2176}
2177
ba86ff93
PC
2178static void
2179_xlshi(jit_state_t *_jit, jit_bool_t sign,
2180 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
2181{
2182 if (i0 == 0) {
2183 movr(r0, r2);
2184 if (sign)
2185 rshi(r1, r2, __WORDSIZE - 1);
2186 else
2187 movi(r1, 0);
2188 }
2189 else if (i0 == __WORDSIZE) {
2190 movr(r1, r2);
2191 movi(r0, 0);
2192 }
2193 else {
2194 assert((jit_uword_t)i0 <= __WORDSIZE);
2195 if (sign)
2196 rshi(r1, r2, __WORDSIZE - i0);
2197 else
2198 rshi_u(r1, r2, __WORDSIZE - i0);
2199 lshi(r0, r2, i0);
2200 }
2201}
2202
2203static void
2204_xrshr(jit_state_t *_jit, jit_bool_t sign,
2205 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
2206{
2207 jit_int32_t sav, set;
2208 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
2209 jit_word_t over, zero, done;
2210 sav = set = 0;
2211 /* %RCX must be used for shift. */
2212 qsavset(_RCX_REGNO);
2213 allocr(_RCX_REGNO, _RCX);
2214 /* Almost certainly not %RCX */
2215 t1 = r1;
2216 if (r0 == _RCX_REGNO) {
2217 s0 = jit_get_reg(jit_class_gpr);
2218 t0 = rn(s0);
2219 }
2220 else {
2221 t0 = r0;
2222 /* r0 == r1 is undefined behavior */
2223 if (r1 == _RCX_REGNO) {
2224 s1 = jit_get_reg(jit_class_gpr);
2225 t1 = rn(s1);
2226 }
2227 }
2228 /* Allocate a temporary if a register is used more than once, or if
2229 * the value to shift is %RCX */
2230 if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
2231 s2 = jit_get_reg(jit_class_gpr);
2232 t2 = rn(s2);
2233 movr(t2, r2);
2234 }
2235 else
2236 t2 = r2;
2237 /* Allocate temporary if shift is also one of the outputs */
2238 if (r0 == r3 || r1 == r3) {
2239 s3 = jit_get_reg(jit_class_gpr);
2240 t3 = rn(s3);
2241 movr(t3, r3);
2242 }
2243 else
2244 t3 = r3;
2245 /* Bits to shift left */
d481fb64 2246 if (sign) {
ba86ff93 2247 rshi(t1, t2, __WORDSIZE - 1);
d481fb64
PC
2248 /* Special case for negative value and zero shift */
2249 alui(X86_CMP, t3, 0);
2250 zero = jnes(_jit->pc.w);
2251 movi(t1, 0);
2252 patch_at(zero, _jit->pc.w);
2253 }
ba86ff93
PC
2254 else
2255 movi(t1, 0);
2256 /* Shift in %RCX */
2257 /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
2258 movr(_RCX_REGNO, t3);
2259 /* Copy value to low register */
2260 movr(t0, t2);
2261 /* SHRD shifts t0 right pulling extra bits in the left from t1 */
2262 /* SHRD */
2263 rex(0, WIDE, t1, _NOREG, t0);
2264 ic(0xf);
2265 ic(0xad);
2266 mrm(0x03, r7(t1), r7(t0));
2267 /* Must swap results if shift value is __WORDSIZE */
2268 alui(X86_CMP, t3, __WORDSIZE);
2269 over = jes(_jit->pc.w);
d481fb64 2270 /* Already zero if shift value is zero */
ba86ff93
PC
2271 alui(X86_CMP, t3, 0);
2272 zero = jes(_jit->pc.w);
2273 /* Calculate bits to shift left and fill high register */
2274 rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
2275 lshr(t1, t2, _RCX_REGNO);
2276 done = jmpsi(_jit->pc.w);
2277 /* Swap registers if shift is __WORDSIZE */
2278 patch_at(over, _jit->pc.w);
2279 xchgr(t0, t1);
2280 /* If shift value is zero */
2281 patch_at(zero, _jit->pc.w);
2282 patch_at(done, _jit->pc.w);
2283 /* Release %RCX (if spilled) after branches */
2284 clear(_RCX_REGNO, _RCX);
2285 if (t3 != r3)
2286 jit_unget_reg(s3);
2287 if (t2 != r2)
2288 jit_unget_reg(s2);
2289 if (t1 != r1) {
2290 movr(r1, t1);
2291 jit_unget_reg(s1);
2292 }
2293 if (t0 != r0) {
2294 movr(r0, t0);
2295 jit_unget_reg(s0);
2296 }
2297}
2298
2299static void
2300_xrshi(jit_state_t *_jit, jit_bool_t sign,
2301 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
2302{
2303 if (i0 == 0) {
2304 movr(r0, r2);
d481fb64 2305 movi(r1, 0);
ba86ff93
PC
2306 }
2307 else if (i0 == __WORDSIZE) {
2308 movr(r1, r2);
2309 if (sign)
2310 rshi(r0, r2, __WORDSIZE - 1);
2311 else
2312 movi(r0, 0);
2313 }
2314 else {
2315 assert((jit_uword_t)i0 <= __WORDSIZE);
2316 lshi(r1, r2, __WORDSIZE - i0);
2317 if (sign)
2318 rshi(r0, r2, i0);
2319 else
2320 rshi_u(r0, r2, i0);
2321 }
2322}
2323
4a71579b
PC
2324static void
2325_unr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
2326{
2327 rex(0, WIDE, _NOREG, _NOREG, r0);
2328 ic(0xf7);
2329 mrm(0x03, code, r7(r0));
2330}
2331
2332static void
2333_negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2334{
2335 if (r0 == r1)
2336 inegr(r0);
2337 else {
2338 ixorr(r0, r0);
2339 isubr(r0, r1);
2340 }
2341}
2342
2343static void
2344_comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2345{
2346 movr(r0, r1);
2347 icomr(r0);
2348}
2349
2350#if USE_INC_DEC
2351static void
2352_incr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2353{
2354 movr(r0, r1);
2355# if __X64
2356 rex(0, WIDE, _NOREG, _NOREG, r0);
2357 ic(0xff);
2358 ic(0xc0 | r7(r0));
2359# else
2360 ic(0x40 | r7(r0));
2361# endif
2362}
2363
2364static void
2365_decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2366{
2367 movr(r0, r1);
2368# if __X64
2369 rex(0, WIDE, _NOREG, _NOREG, r0);
2370 ic(0xff);
2371 ic(0xc8 | r7(r0));
2372# else
2373 ic(0x48 | r7(r0));
2374# endif
2375}
2376#endif
2377
79bfeef6
PC
2378static void
2379_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2380{
2381 comr(r0, r1);
2382 clzr(r0, r0);
2383}
2384
2385static void
2386_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2387{
2388 jit_word_t w, x;
2389 /* LZCNT */
2390 if (jit_cpu.abm)
2391 ic(0xf3);
2392 /* else BSR */
2393 rex(0, WIDE, r0, _NOREG, r1);
2394 ic(0x0f);
2395 ic(0xbd);
2396 mrm(0x3, r7(r0), r7(r1));
2397 if (!jit_cpu.abm) {
2398 /* jump if undefined: r1 == 0 */
2399 w = jccs(X86_CC_E, _jit->pc.w);
2400 /* count leading zeros */
2401 rsbi(r0, r0, __WORDSIZE - 1);
2402 /* done */
2403 x = jmpsi(_jit->pc.w);
2404 /* if r1 == 0 */
2405 patch_at(w, _jit->pc.w);
2406 movi(r0, __WORDSIZE);
2407 /* not undefined */
2408 patch_at(x, _jit->pc.w);
2409 }
2410 /* LZCNT has defined behavior for value zero and count leading zeros */
2411}
2412
2413static void
2414_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2415{
2416 comr(r0, r1);
2417 ctzr(r0, r0);
2418}
2419
2420static void
2421_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2422{
2423 jit_word_t w;
2424 jit_int32_t t0;
2425 if (!jit_cpu.abm) {
2426 if (jit_cmov_p())
2427 t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk);
2428 else
2429 t0 = _NOREG;
2430 if (t0 != _NOREG)
2431 movi(rn(t0), __WORDSIZE);
2432 }
2433 /* TZCNT */
2434 if (jit_cpu.abm)
2435 ic(0xf3);
2436 /* else BSF */
2437 rex(0, WIDE, r0, _NOREG, r1);
2438 ic(0x0f);
2439 ic(0xbc);
2440 mrm(0x3, r7(r0), r7(r1));
2441 if (!jit_cpu.abm) {
2442 /* No conditional move or need spill/reload a temporary */
2443 if (t0 == _NOREG) {
2444 w = jccs(X86_CC_E, _jit->pc.w);
2445 movi(r0, __WORDSIZE);
2446 patch_at(w, _jit->pc.w);
2447 }
2448 else {
2449 /* CMOVE */
2450 rex(0, WIDE, r0, _NOREG, rn(t0));
2451 ic(0x0f);
2452 ic(0x44);
2453 mrm(0x3, r7(r0), r7(rn(t0)));
2454 jit_unget_reg(t0);
2455 }
2456 }
2457 /* TZCNT has defined behavior for value zero */
2458}
2459
ba86ff93
PC
2460static void
2461_rbitr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
2462{
2463 jit_word_t loop;
2464 jit_int32_t sav, set;
2465 jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
2466 static const unsigned char swap_tab[256] = {
2467 0, 128, 64, 192, 32, 160, 96, 224,
2468 16, 144, 80, 208, 48, 176, 112, 240,
2469 8, 136, 72, 200, 40, 168, 104, 232,
2470 24, 152, 88, 216 ,56, 184, 120, 248,
2471 4, 132, 68, 196, 36, 164, 100, 228,
2472 20, 148, 84, 212, 52, 180, 116, 244,
2473 12, 140, 76, 204, 44, 172, 108, 236,
2474 28, 156, 92, 220, 60, 188, 124, 252,
2475 2, 130, 66, 194, 34, 162, 98, 226,
2476 18, 146, 82, 210, 50, 178, 114, 242,
2477 10, 138, 74, 202, 42, 170, 106, 234,
2478 26, 154, 90, 218, 58, 186, 122, 250,
2479 6, 134, 70, 198, 38, 166, 102, 230,
2480 22, 150, 86, 214, 54, 182, 118, 246,
2481 14, 142, 78, 206, 46, 174, 110, 238,
2482 30, 158, 94, 222, 62, 190, 126, 254,
2483 1, 129, 65, 193, 33, 161, 97, 225,
2484 17, 145, 81, 209, 49, 177, 113, 241,
2485 9, 137, 73, 201, 41, 169, 105, 233,
2486 25, 153, 89, 217, 57, 185, 121, 249,
2487 5, 133, 69, 197, 37, 165, 101, 229,
2488 21, 149, 85, 213, 53, 181, 117, 245,
2489 13, 141, 77, 205, 45, 173, 109, 237,
2490 29, 157, 93, 221, 61, 189, 125, 253,
2491 3, 131, 67, 195, 35, 163, 99, 227,
2492 19, 147, 83, 211, 51, 179, 115, 243,
2493 11, 139, 75, 203, 43, 171, 107, 235,
2494 27, 155, 91, 219, 59, 187, 123, 251,
2495 7, 135, 71, 199, 39, 167, 103, 231,
2496 23, 151, 87, 215, 55, 183, 119, 247,
2497 15, 143, 79, 207, 47, 175, 111, 239,
2498 31, 159, 95, 223, 63, 191, 127, 255
2499 };
2500 sav = set = 0;
2501 isavset(_RCX_REGNO);
2502 allocr(_RCX_REGNO, _RCX);
2503 if (r0 == _RCX_REGNO) {
2504 t0 = jit_get_reg(jit_class_gpr);
2505 r0_reg = rn(t0);
2506 }
2507 else {
2508 t0 = JIT_NOREG;
2509 r0_reg = r0;
2510 }
2511 if (r1 == _RCX_REGNO || r0 == r1) {
2512 t1 = jit_get_reg(jit_class_gpr);
2513 r1_reg = rn(t1);
2514 movr(r1_reg, r1);
2515 }
2516 else {
2517 t1 = JIT_NOREG;
2518 r1_reg = r1;
2519 }
2520 t2 = jit_get_reg(jit_class_gpr);
2521 t3 = jit_get_reg(jit_class_gpr);
2522#if __WORDSIZE == 32
2523 /* Avoid condition that causes running out of registers */
2524 if (!reg8_p(r1_reg)) {
2525 movi(rn(t2), 0xff);
2526 andr(rn(t2), r1_reg, rn(t2));
2527 }
2528 else
2529#endif
2530 extr_uc(rn(t2), r1_reg);
2531 movi(rn(t3), (jit_word_t)swap_tab);
2532 ldxr_uc(r0_reg, rn(t3), rn(t2));
2533 movi(_RCX_REGNO, 8);
2534 loop = _jit->pc.w;
2535 rshr(rn(t2), r1_reg, _RCX_REGNO);
2536 extr_uc(rn(t2), rn(t2));
2537 lshi(r0_reg, r0_reg, 8);
2538 ldxr_uc(rn(t2), rn(t3), rn(t2));
2539 orr(r0_reg, r0_reg, rn(t2));
2540 addi(_RCX_REGNO, _RCX_REGNO, 8);
2541 alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
2542 jls(loop);
2543 clear(_RCX_REGNO, _RCX);
2544 jit_unget_reg(t3);
2545 jit_unget_reg(t2);
2546 if (t1 != JIT_NOREG)
2547 jit_unget_reg(t1);
2548 if (t0 != JIT_NOREG) {
2549 movr(r0, r0_reg);
2550 jit_unget_reg(t0);
2551 }
2552}
2553
2554static void
2555_popcntr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
2556{
2557 if (jit_cpu.abm) {
2558 ic(0xf3);
2559 rex(0, WIDE, r0, _NOREG, r1);
2560 ic(0x0f);
2561 ic(0xb8);
2562 mrm(0x3, r7(r0), r7(r1));
2563 }
2564 else {
2565 jit_word_t loop;
2566 jit_int32_t sav, set;
2567 jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
2568 static const unsigned char pop_tab[256] = {
2569 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
2570 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2571 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2572 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2573 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2574 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2575 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2576 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
2577 };
2578 sav = set = 0;
2579 isavset(_RCX_REGNO);
2580 allocr(_RCX_REGNO, _RCX);
2581 if (r0 == _RCX_REGNO) {
2582 t0 = jit_get_reg(jit_class_gpr);
2583 r0_reg = rn(t0);
2584 }
2585 else {
2586 t0 = JIT_NOREG;
2587 r0_reg = r0;
2588 }
2589 if (r1 == _RCX_REGNO || r0 == r1) {
2590 t1 = jit_get_reg(jit_class_gpr);
2591 r1_reg = rn(t1);
2592 movr(r1_reg, r1);
2593 }
2594 else {
2595 t1 = JIT_NOREG;
2596 r1_reg = r1;
2597 }
2598 t2 = jit_get_reg(jit_class_gpr);
2599 t3 = jit_get_reg(jit_class_gpr);
2600#if __WORDSIZE == 32
2601 /* Avoid condition that causes running out of registers */
2602 if (!reg8_p(r1_reg)) {
2603 movi(rn(t2), 0xff);
2604 andr(rn(t2), r1_reg, rn(t2));
2605 }
2606 else
2607#endif
2608 extr_uc(rn(t2), r1_reg);
2609 movi(rn(t3), (jit_word_t)pop_tab);
2610 ldxr_uc(r0_reg, rn(t3), rn(t2));
2611 movi(_RCX_REGNO, 8);
2612 loop = _jit->pc.w;
2613 rshr(rn(t2), r1_reg, _RCX_REGNO);
2614 extr_uc(rn(t2), rn(t2));
2615 ldxr_uc(rn(t2), rn(t3), rn(t2));
2616 addr(r0_reg, r0_reg, rn(t2));
2617 addi(_RCX_REGNO, _RCX_REGNO, 8);
2618 alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
2619 jls(loop);
2620 clear(_RCX_REGNO, _RCX);
2621 jit_unget_reg(t3);
2622 jit_unget_reg(t2);
2623 if (t1 != JIT_NOREG)
2624 jit_unget_reg(t1);
2625 if (t0 != JIT_NOREG) {
2626 movr(r0, r0_reg);
2627 jit_unget_reg(t0);
2628 }
2629 }
2630}
2631
4a71579b
PC
2632static void
2633_cr(jit_state_t *_jit,
2634 jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2635{
2636 jit_int32_t reg;
2637 jit_bool_t same;
2638 if (reg8_p(r0)) {
2639 same = r0 == r1 || r0 == r2;
2640 if (!same)
2641 ixorr(r0, r0);
2642 icmpr(r1, r2);
2643 if (same)
2644 imovi(r0, 0);
2645 cc(code, r0);
2646 }
2647 else {
2648 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2649 ixorr(rn(reg), rn(reg));
2650 icmpr(r1, r2);
2651 cc(code, rn(reg));
2652 movr(r0, rn(reg));
2653 jit_unget_reg(reg);
2654 }
2655}
2656
2657static void
2658_ci(jit_state_t *_jit,
2659 jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2660{
2661 jit_int32_t reg;
2662 jit_bool_t same;
2663 if (reg8_p(r0)) {
2664 same = r0 == r1;
2665 if (!same)
2666 ixorr(r0, r0);
2667 icmpi(r1, i0);
2668 if (same)
2669 imovi(r0, 0);
2670 cc(code, r0);
2671 }
2672 else {
2673 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2674 ixorr(rn(reg), rn(reg));
2675 icmpi(r1, i0);
2676 cc(code, rn(reg));
2677 movr(r0, rn(reg));
2678 jit_unget_reg(reg);
2679 }
2680}
2681
2682static void
2683_ci0(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
2684{
2685 jit_int32_t reg;
2686 jit_bool_t same;
2687 if (reg8_p(r0)) {
2688 same = r0 == r1;
2689 if (!same)
2690 ixorr(r0, r0);
2691 testr(r1, r1);
2692 if (same)
2693 imovi(r0, 0);
2694 cc(code, r0);
2695 }
2696 else {
2697 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2698 ixorr(rn(reg), rn(reg));
2699 testr(r1, r1);
2700 cc(code, rn(reg));
2701 movr(r0, rn(reg));
2702 jit_unget_reg(reg);
2703 }
2704}
2705
2706static void
2707_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2708{
2709 if (r1 == r2)
2710 movi(r0, 0);
2711 else
2712 cr(X86_CC_L, r0, r1, r2);
2713}
2714
2715static void
2716_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2717{
2718 if (i0)
2719 ci(X86_CC_L, r0, r1, i0);
2720 else
2721 ci0(X86_CC_S, r0, r1);
2722}
2723
2724static void
2725_ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2726{
2727 if (r1 == r2)
2728 movi(r0, 0);
2729 else
2730 cr(X86_CC_B, r0, r1, r2);
2731}
2732
2733static void
2734_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2735{
2736 if (r1 == r2)
2737 movi(r0, 1);
2738 else
2739 cr(X86_CC_LE, r0, r1, r2);
2740}
2741
2742static void
2743_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2744{
2745 if (r1 == r2)
2746 movi(r0, 1);
2747 else
2748 cr(X86_CC_BE, r0, r1, r2);
2749}
2750
2751static void
2752_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2753{
2754 if (i0)
2755 ci(X86_CC_BE, r0, r1, i0);
2756 else
2757 ci0(X86_CC_E, r0, r1);
2758}
2759
2760static void
2761_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2762{
2763 if (r1 == r2)
2764 movi(r0, 1);
2765 else
2766 cr(X86_CC_E, r0, r1, r2);
2767}
2768
2769static void
2770_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2771{
2772 if (i0)
2773 ci(X86_CC_E, r0, r1, i0);
2774 else
2775 ci0(X86_CC_E, r0, r1);
2776}
2777
2778static void
2779_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2780{
2781 if (r1 == r2)
2782 movi(r0, 1);
2783 else
2784 cr(X86_CC_GE, r0, r1, r2);
2785}
2786
2787static void
2788_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2789{
2790 if (i0)
2791 ci(X86_CC_GE, r0, r1, i0);
2792 else
2793 ci0(X86_CC_NS, r0, r1);
2794}
2795
2796static void
2797_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2798{
2799 if (r1 == r2)
2800 movi(r0, 1);
2801 else
2802 cr(X86_CC_AE, r0, r1, r2);
2803}
2804
2805static void
2806_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2807{
2808 if (i0)
2809 ci(X86_CC_AE, r0, r1, i0);
2810 else
2811 ci0(X86_CC_NB, r0, r1);
2812}
2813
2814static void
2815_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2816{
2817 if (r1 == r2)
2818 movi(r0, 0);
2819 else
2820 cr(X86_CC_G, r0, r1, r2);
2821}
2822
2823static void
2824_gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2825{
2826 if (r1 == r2)
2827 movi(r0, 0);
2828 else
2829 cr(X86_CC_A, r0, r1, r2);
2830}
2831
2832static void
2833_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2834{
2835 if (i0)
2836 ci(X86_CC_A, r0, r1, i0);
2837 else
2838 ci0(X86_CC_NE, r0, r1);
2839}
2840
2841static void
2842_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2843{
2844 if (r1 == r2)
2845 movi(r0, 0);
2846 else
2847 cr(X86_CC_NE, r0, r1, r2);
2848}
2849
2850static void
2851_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2852{
2853 if (i0)
2854 ci(X86_CC_NE, r0, r1, i0);
2855 else
2856 ci0(X86_CC_NE, r0, r1);
2857}
2858
2859static void
2860_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2861{
2862 if (r0 != r1) {
2863 rex(0, 1, r1, _NOREG, r0);
2864 ic(0x89);
2865 ic(0xc0 | (r1 << 3) | r7(r0));
2866 }
2867}
2868
2869static void
2870_imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2871{
2872#if __X64
2873# if !__X64_32
2874 if (fits_uint32_p(i0)) {
2875# endif
2876 rex(0, 0, _NOREG, _NOREG, r0);
2877 ic(0xb8 | r7(r0));
2878 ii(i0);
2879# if !__X64_32
2880 }
79bfeef6
PC
2881 else if (can_sign_extend_int_p(i0)) {
2882 rex(0, 1, _NOREG, _NOREG, r0);
2883 ic(0xc7);
2884 ic(0xc0 | r7(r0));
2885 ii(i0);
2886 }
4a71579b
PC
2887 else {
2888 rex(0, 1, _NOREG, _NOREG, r0);
2889 ic(0xb8 | r7(r0));
2890 il(i0);
2891 }
2892# endif
2893#else
2894 ic(0xb8 | r7(r0));
2895 ii(i0);
2896#endif
2897}
2898
79bfeef6
PC
2899#if CAN_RIP_ADDRESS
2900static jit_word_t
2901#else
4a71579b 2902static void
79bfeef6 2903#endif
4a71579b
PC
2904_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2905{
79bfeef6
PC
2906#if CAN_RIP_ADDRESS
2907 jit_word_t w, rel;
2908 w = _jit->pc.w;
2909 rel = i0 - (w + 8);
2910 rel = rel < 0 ? rel - 8 : rel + 8;
2911 if (can_sign_extend_int_p(rel)) {
2912 /* lea rel(%rip), %r0 */
2913 rex(0, WIDE, r0, _NOREG, _NOREG);
2914 w = _jit->pc.w;
2915 ic(0x8d);
2916 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
2917 }
2918 else
2919#endif
4a71579b
PC
2920 if (i0)
2921 imovi(r0, i0);
2922 else
2923 ixorr(r0, r0);
79bfeef6
PC
2924#if CAN_RIP_ADDRESS
2925 return (w);
2926#endif
4a71579b
PC
2927}
2928
2929static jit_word_t
2930_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2931{
79bfeef6 2932 jit_word_t w;
4a71579b 2933 rex(0, WIDE, _NOREG, _NOREG, r0);
79bfeef6 2934 w = _jit->pc.w;
4a71579b
PC
2935 ic(0xb8 | r7(r0));
2936 il(i0);
79bfeef6 2937 return (w);
4a71579b
PC
2938}
2939
2940static void
2941_movcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2942{
2943 rex(0, WIDE, r0, _NOREG, r1);
2944 ic(0x0f);
2945 ic(0xbe);
2946 mrm(0x03, r7(r0), r7(r1));
2947}
2948
2949static void
2950_movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2951{
2952 rex(0, WIDE, r0, _NOREG, r1);
2953 ic(0x0f);
2954 ic(0xb6);
2955 mrm(0x03, r7(r0), r7(r1));
2956}
2957
2958static void
2959_movsr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2960{
2961 rex(0, WIDE, r0, _NOREG, r1);
2962 ic(0x0f);
2963 ic(0xbf);
2964 mrm(0x03, r7(r0), r7(r1));
2965}
2966
2967static void
2968_movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2969{
2970 rex(0, WIDE, r0, _NOREG, r1);
2971 ic(0x0f);
2972 ic(0xb7);
2973 mrm(0x03, r7(r0), r7(r1));
2974}
2975
ba3814c1
PC
2976static void
2977_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2978 jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
2979{
2980 jit_int32_t save_rax, restore_rax;
2981 jit_int32_t ascasr_reg, ascasr_use;
2982 if (r0 != _RAX_REGNO) { /* result not in %rax */
2983 if (r2 != _RAX_REGNO) { /* old value not in %rax */
2984 save_rax = jit_get_reg(jit_class_gpr);
2985 movr(rn(save_rax), _RAX_REGNO);
2986 restore_rax = 1;
2987 }
2988 else
2989 restore_rax = 0;
2990 }
2991 else
2992 restore_rax = 0;
2993 if (r2 != _RAX_REGNO)
2994 movr(_RAX_REGNO, r2);
2995 if (r1 == _NOREG) { /* using immediate address */
2996 if (!can_sign_extend_int_p(i0)) {
2997 ascasr_reg = jit_get_reg(jit_class_gpr);
2998 if (ascasr_reg == _RAX) {
2999 ascasr_reg = jit_get_reg(jit_class_gpr);
3000 jit_unget_reg(_RAX);
3001 }
3002 ascasr_use = 1;
3003 movi(rn(ascasr_reg), i0);
3004 }
3005 else
3006 ascasr_use = 0;
3007 }
3008 else
3009 ascasr_use = 0;
3010 ic(0xf0); /* lock */
3011 if (ascasr_use)
3012 rex(0, WIDE, r3, _NOREG, rn(ascasr_reg));
3013 else
3014 rex(0, WIDE, r3, _NOREG, r1);
3015 ic(0x0f);
3016 ic(0xb1);
3017 if (r1 != _NOREG) /* casr */
3018 rx(r3, 0, r1, _NOREG, _SCL1);
3019 else { /* casi */
3020 if (ascasr_use)
3021 rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */
3022 else
3023 rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */
3024 }
3025 cc(X86_CC_E, r0);
3026 if (r0 != _RAX_REGNO)
3027 movr(r0, _RAX_REGNO);
3028 if (restore_rax) {
3029 movr(_RAX_REGNO, rn(save_rax));
3030 jit_unget_reg(save_rax);
3031 }
3032 if (ascasr_use)
3033 jit_unget_reg(ascasr_reg);
3034}
3035
1f22b268
PC
3036static void
3037_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3038{
3039 assert(jit_cmov_p());
3040
3041 testr(r2, r2);
3042
3043 rex(0, WIDE, r0, _NOREG, r1);
3044 ic(0x0f);
3045 ic(0x45);
3046 mrm(0x03, r7(r0), r7(r1));
3047}
3048
3049static void
3050_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3051{
3052 assert(jit_cmov_p());
3053
3054 testr(r2, r2);
3055
3056 rex(0, WIDE, r0, _NOREG, r1);
3057 ic(0x0f);
3058 ic(0x44);
3059 mrm(0x03, r7(r0), r7(r1));
3060}
3061
4a71579b
PC
3062#if __X64
3063static void
3064_movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3065{
3066 rex(0, 1, r0, _NOREG, r1);
3067 ic(0x63);
3068 mrm(0x03, r7(r0), r7(r1));
3069}
3070
3071static void
3072_movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3073{
3074 rex(0, 0, r1, _NOREG, r0);
3075 ic(0x89);
3076 ic(0xc0 | (r1 << 3) | r7(r0));
3077}
3078#endif
3079
3080static void
40a44dcb 3081_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b
PC
3082{
3083 extr_us(r0, r1);
3084 ic(0x66);
3085 rex(0, 0, _NOREG, _NOREG, r0);
3086 ic(0xc1);
3087 mrm(0x03, X86_ROR, r7(r0));
3088 ic(8);
3089}
3090
3091static void
40a44dcb 3092_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b
PC
3093{
3094 movr(r0, r1);
3095 rex(0, 0, _NOREG, _NOREG, r0);
3096 ic(0x0f);
3097 ic(0xc8 | r7(r0));
3098}
3099
3100#if __X64 && !__X64_32
3101static void
40a44dcb 3102_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b
PC
3103{
3104 movr(r0, r1);
3105 rex(0, 1, _NOREG, _NOREG, r0);
3106 ic(0x0f);
3107 ic(0xc8 | r7(r0));
3108}
3109#endif
3110
ba86ff93
PC
3111static void
3112_extr(jit_state_t *_jit,
3113 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3114{
3115 jit_word_t mask;
3116 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3117 if (i1 == __WORDSIZE)
3118 movr(r0, r1);
3119 else {
3120 if (__WORDSIZE - (i0 + i1)) {
3121 lshi(r0, r1, __WORDSIZE - (i0 + i1));
3122 rshi(r0, r0, __WORDSIZE - i1);
3123 }
3124 else
3125 rshi(r0, r1, __WORDSIZE - i1);
3126 }
3127}
3128
3129static void
3130_extr_u(jit_state_t *_jit,
3131 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3132{
3133 jit_int32_t t0;
3134 jit_word_t mask;
3135 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3136 if (i1 == __WORDSIZE)
3137 movr(r0, r1);
3138 /* Only cheaper in code size or number of instructions if i0 is not zero */
3139 /* Number of cpu cicles not tested */
3140 else if (i0 && jit_cpu.bmi2) {
3141 mask = ((ONE << i1) - 1) << i0;
3142 t0 = jit_get_reg(jit_class_gpr);
3143 movi(rn(t0), mask);
3144 /* PEXT */
3145 vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 2);
3146 ic(0xf5);
3147 mrm(0x03, r7(r0), r7(rn(t0)));
3148 jit_unget_reg(t0);
3149 }
3150 else {
3151 if (i0)
3152 rshi_u(r0, r1, i0);
3153 andi(r0, r0, (ONE << i1) - 1);
3154 }
3155}
3156
3157static void
3158_depr(jit_state_t *_jit,
3159 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3160{
3161 jit_word_t mask;
3162 jit_int32_t t0, t1;
3163 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3164 if (i1 == __WORDSIZE)
3165 movr(r0, r1);
3166 /* Only cheaper in code size or number of instructions if i0 is not zero */
3167 /* Number of cpu cicles not tested */
3168 else if (i0 && jit_cpu.bmi2) {
3169 mask = ((ONE << i1) - 1) << i0;
3170 t0 = jit_get_reg(jit_class_gpr);
3171 t1 = jit_get_reg(jit_class_gpr);
3172 movi(rn(t0), mask);
3173 movr(rn(t1), r0);
3174 /* PDEP */
3175 vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 3);
3176 ic(0xf5);
3177 mrm(0x03, r7(r0), r7(rn(t0)));
3178 andi(rn(t1), rn(t1), ~mask);
3179 orr(r0, r0, rn(t1));
3180 jit_unget_reg(t1);
3181 jit_unget_reg(t0);
3182 }
3183 else {
3184 mask = (ONE << i1) - 1;
3185 t0 = jit_get_reg(jit_class_gpr);
3186 andi(rn(t0), r1, mask);
3187 if (i0) {
3188 lshi(rn(t0), rn(t0), i0);
3189 mask <<= i0;
3190 }
3191 andi(r0, r0, ~mask);
3192 orr(r0, r0, rn(t0));
3193 jit_unget_reg(t0);
3194 }
3195}
3196
4a71579b
PC
3197static void
3198_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3199{
3200 jit_int32_t reg;
3201 if (reg8_p(r1))
3202 movcr(r0, r1);
3203 else {
3204 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3205 movr(rn(reg), r1);
3206 movcr(r0, rn(reg));
3207 jit_unget_reg(reg);
3208 }
3209}
3210
3211static void
3212_extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3213{
3214 jit_int32_t reg;
3215 if (reg8_p(r1))
3216 movcr_u(r0, r1);
3217 else {
3218 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3219 movr(rn(reg), r1);
3220 movcr_u(r0, rn(reg));
3221 jit_unget_reg(reg);
3222 }
3223}
3224
3225static void
3226_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3227{
3228 rex(0, WIDE, r0, _NOREG, r1);
3229 ic(0x0f);
3230 ic(0xbe);
3231 rx(r0, 0, r1, _NOREG, _SCL1);
3232}
3233
3234static void
3235_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3236{
3237 jit_int32_t reg;
79bfeef6
PC
3238#if CAN_RIP_ADDRESS
3239 jit_word_t rel = i0 - _jit->pc.w;
3240 rel = rel < 0 ? rel - 8 : rel + 8;
3241 if (can_sign_extend_int_p(rel)) {
3242 rex(0, WIDE, r0, _NOREG, _NOREG);
3243 ic(0x0f);
3244 ic(0xbe);
3245 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3246 }
3247 else
3248#endif
3249 if (address_p(i0)) {
4a71579b
PC
3250 rex(0, WIDE, r0, _NOREG, _NOREG);
3251 ic(0x0f);
3252 ic(0xbe);
3253 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3254 }
3255 else {
3256 reg = jit_get_reg(jit_class_gpr);
3257 movi(rn(reg), i0);
3258 ldr_c(r0, rn(reg));
3259 jit_unget_reg(reg);
3260 }
3261}
3262
3263static void
3264_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3265{
3266 rex(0, WIDE, r0, _NOREG, r1);
3267 ic(0x0f);
3268 ic(0xb6);
3269 rx(r0, 0, r1, _NOREG, _SCL1);
3270}
3271
3272static void
3273_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3274{
3275 jit_int32_t reg;
79bfeef6
PC
3276#if CAN_RIP_ADDRESS
3277 jit_word_t rel = i0 - _jit->pc.w;
3278 rel = rel < 0 ? rel - 8 : rel + 8;
3279 if (can_sign_extend_int_p(rel)) {
3280 rex(0, WIDE, r0, _NOREG, _NOREG);
3281 ic(0x0f);
3282 ic(0xb6);
3283 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3284 }
3285 else
3286#endif
3287 if (address_p(i0)) {
4a71579b
PC
3288 rex(0, WIDE, r0, _NOREG, _NOREG);
3289 ic(0x0f);
3290 ic(0xb6);
3291 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3292 }
3293 else {
3294 reg = jit_get_reg(jit_class_gpr);
3295 movi(rn(reg), i0);
3296 ldr_uc(r0, rn(reg));
3297 jit_unget_reg(reg);
3298 }
3299}
3300
3301static void
3302_ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3303{
3304 rex(0, WIDE, r0, _NOREG, r1);
3305 ic(0x0f);
3306 ic(0xbf);
3307 rx(r0, 0, r1, _NOREG, _SCL1);
3308}
3309
3310static void
3311_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3312{
3313 jit_int32_t reg;
79bfeef6
PC
3314#if CAN_RIP_ADDRESS
3315 jit_word_t rel = i0 - _jit->pc.w;
3316 rel = rel < 0 ? rel - 8 : rel + 8;
3317 if (can_sign_extend_int_p(rel)) {
3318 rex(0, WIDE, r0, _NOREG, _NOREG);
3319 ic(0x0f);
3320 ic(0xbf);
3321 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3322 }
3323 else
3324#endif
3325 if (address_p(i0)) {
4a71579b
PC
3326 rex(0, WIDE, r0, _NOREG, _NOREG);
3327 ic(0x0f);
3328 ic(0xbf);
3329 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3330 }
3331 else {
3332 reg = jit_get_reg(jit_class_gpr);
3333 movi(rn(reg), i0);
3334 ldr_s(r0, rn(reg));
3335 jit_unget_reg(reg);
3336 }
3337}
3338
3339static void
3340_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3341{
3342 rex(0, WIDE, r0, _NOREG, r1);
3343 ic(0x0f);
3344 ic(0xb7);
3345 rx(r0, 0, r1, _NOREG, _SCL1);
3346}
3347
3348static void
3349_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3350{
3351 jit_int32_t reg;
79bfeef6
PC
3352#if CAN_RIP_ADDRESS
3353 jit_word_t rel = i0 - _jit->pc.w;
3354 rel = rel < 0 ? rel - 8 : rel + 8;
3355 if (can_sign_extend_int_p(rel)) {
3356 rex(0, WIDE, r0, _NOREG, _NOREG);
3357 ic(0x0f);
3358 ic(0xb7);
3359 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3360 }
3361 else
3362#endif
3363 if (address_p(i0)) {
4a71579b
PC
3364 rex(0, WIDE, r0, _NOREG, _NOREG);
3365 ic(0x0f);
3366 ic(0xb7);
3367 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3368 }
3369 else {
3370 reg = jit_get_reg(jit_class_gpr);
3371 movi(rn(reg), i0);
3372 ldr_us(r0, rn(reg));
3373 jit_unget_reg(reg);
3374 }
3375}
3376
3377#if __X32 || !__X64_32
3378static void
3379_ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3380{
3381#if __X64
3382 rex(0, WIDE, r0, _NOREG, r1);
3383 ic(0x63);
3384#else
3385 ic(0x8b);
3386#endif
3387 rx(r0, 0, r1, _NOREG, _SCL1);
3388}
3389
3390static void
3391_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3392{
3393 jit_int32_t reg;
79bfeef6
PC
3394#if CAN_RIP_ADDRESS
3395 jit_word_t rel = i0 - _jit->pc.w;
3396 rel = rel < 0 ? rel - 8 : rel + 8;
3397 if (can_sign_extend_int_p(rel)) {
3398 rex(0, WIDE, r0, _NOREG, _NOREG);
3399 ic(0x63);
3400 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3401 }
3402 else
3403#endif
3404 if (address_p(i0)) {
4a71579b
PC
3405#if __X64
3406 rex(0, WIDE, r0, _NOREG, _NOREG);
3407 ic(0x63);
3408#else
3409 ic(0x8b);
3410#endif
3411 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3412 }
3413 else {
3414 reg = jit_get_reg(jit_class_gpr);
3415 movi(rn(reg), i0);
3416 ldr_i(r0, rn(reg));
3417 jit_unget_reg(reg);
3418 }
3419}
3420#endif
3421
3422#if __X64
3423static void
3424_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3425{
3426 rex(0, 0, r0, _NOREG, r1);
3427 ic(0x63);
3428 rx(r0, 0, r1, _NOREG, _SCL1);
3429}
3430
3431static void
3432_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3433{
3434 jit_int32_t reg;
79bfeef6
PC
3435# if !__X64_32
3436 jit_word_t rel = i0 - _jit->pc.w;
3437 rel = rel < 0 ? rel - 8 : rel + 8;
3438 if (can_sign_extend_int_p(rel)) {
3439 rex(0, 0, r0, _NOREG, _NOREG);
3440 ic(0x63);
3441 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3442 }
3443 else
3444#endif
3445 if (address_p(i0)) {
4a71579b
PC
3446 rex(0, 0, r0, _NOREG, _NOREG);
3447 ic(0x63);
3448 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3449 }
3450 else {
3451 reg = jit_get_reg(jit_class_gpr);
3452 movi(rn(reg), i0);
79bfeef6
PC
3453# if __X64_32
3454 ldr_i(r0, rn(reg));
3455# else
4a71579b 3456 ldr_ui(r0, rn(reg));
79bfeef6 3457# endif
4a71579b
PC
3458 jit_unget_reg(reg);
3459 }
3460}
3461
3462# if !__X64_32
3463static void
3464_ldr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3465{
3466 rex(0, 1, r0, _NOREG, r1);
3467 ic(0x8b);
3468 rx(r0, 0, r1, _NOREG, _SCL1);
3469}
3470
3471static void
3472_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3473{
3474 jit_int32_t reg;
79bfeef6
PC
3475 jit_word_t rel = i0 - _jit->pc.w;
3476 rel = rel < 0 ? rel - 8 : rel + 8;
3477 if (can_sign_extend_int_p(rel)) {
3478 rex(0, WIDE, r0, _NOREG, _NOREG);
3479 ic(0x8b);
3480 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3481 }
3482 else if (can_sign_extend_int_p(i0)) {
3483 rex(0, WIDE, r0, _NOREG, _NOREG);
4a71579b
PC
3484 ic(0x8b);
3485 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3486 }
3487 else {
3488 reg = jit_get_reg(jit_class_gpr);
3489 movi(rn(reg), i0);
3490 ldr_l(r0, rn(reg));
3491 jit_unget_reg(reg);
3492 }
3493}
3494# endif
3495#endif
3496
3497static void
3498_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3499{
3500#if __X64_32
3501 addr(r0, r1, r2);
3502 ldr_c(r0, r0);
3503#else
3504 rex(0, WIDE, r0, r1, r2);
3505 ic(0x0f);
3506 ic(0xbe);
3507 rx(r0, 0, r2, r1, _SCL1);
3508#endif
3509}
3510
3511static void
3512_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3513{
3514 jit_int32_t reg;
3515 if (can_sign_extend_int_p(i0)) {
3516 rex(0, WIDE, r0, _NOREG, r1);
3517 ic(0x0f);
3518 ic(0xbe);
3519 rx(r0, i0, r1, _NOREG, _SCL1);
3520 }
3521 else {
3522 reg = jit_get_reg(jit_class_gpr);
3523 movi(rn(reg), i0);
3524 ldxr_c(r0, r1, rn(reg));
3525 jit_unget_reg(reg);
3526 }
3527}
3528
3529static void
3530_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3531{
3532#if __X64_32
3533 addr(r0, r1, r2);
3534 ldr_uc(r0, r0);
3535#else
3536 rex(0, WIDE, r0, r1, r2);
3537 ic(0x0f);
3538 ic(0xb6);
3539 rx(r0, 0, r2, r1, _SCL1);
3540#endif
3541}
3542
3543static void
3544_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3545{
3546 jit_int32_t reg;
3547 if (can_sign_extend_int_p(i0)) {
3548 rex(0, WIDE, r0, _NOREG, r1);
3549 ic(0x0f);
3550 ic(0xb6);
3551 rx(r0, i0, r1, _NOREG, _SCL1);
3552 }
3553 else {
3554 reg = jit_get_reg(jit_class_gpr);
3555 movi(rn(reg), i0);
3556 ldxr_uc(r0, r1, rn(reg));
3557 jit_unget_reg(reg);
3558 }
3559}
3560
3561static void
3562_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3563{
3564#if __X64_32
3565 addr(r0, r1, r2);
3566 ldr_s(r0, r0);
3567#else
3568 rex(0, WIDE, r0, r1, r2);
3569 ic(0x0f);
3570 ic(0xbf);
3571 rx(r0, 0, r2, r1, _SCL1);
3572#endif
3573}
3574
3575static void
3576_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3577{
3578 jit_int32_t reg;
3579 if (can_sign_extend_int_p(i0)) {
3580 rex(0, WIDE, r0, _NOREG, r1);
3581 ic(0x0f);
3582 ic(0xbf);
3583 rx(r0, i0, r1, _NOREG, _SCL1);
3584 }
3585 else {
3586 reg = jit_get_reg(jit_class_gpr);
3587 movi(rn(reg), i0);
3588 ldxr_s(r0, r1, rn(reg));
3589 jit_unget_reg(reg);
3590 }
3591}
3592
3593static void
3594_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3595{
3596#if __X64_32
3597 addr(r0, r1, r2);
3598 ldr_us(r0, r0);
3599#else
3600 rex(0, WIDE, r0, r1, r2);
3601 ic(0x0f);
3602 ic(0xb7);
3603 rx(r0, 0, r2, r1, _SCL1);
3604#endif
3605}
3606
3607static void
3608_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3609{
3610 jit_int32_t reg;
3611 if (can_sign_extend_int_p(i0)) {
3612 rex(0, WIDE, r0, _NOREG, r1);
3613 ic(0x0f);
3614 ic(0xb7);
3615 rx(r0, i0, r1, _NOREG, _SCL1);
3616 }
3617 else {
3618 reg = jit_get_reg(jit_class_gpr);
3619 movi(rn(reg), i0);
3620 ldxr_us(r0, r1, rn(reg));
3621 jit_unget_reg(reg);
3622 }
3623}
3624
3625#if __X64 || !__X64_32
3626static void
3627_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3628{
3629#if __X64
3630 rex(0, WIDE, r0, r1, r2);
3631 ic(0x63);
3632#else
3633 ic(0x8b);
3634#endif
3635 rx(r0, 0, r2, r1, _SCL1);
3636}
3637
3638static void
3639_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3640{
3641 jit_int32_t reg;
3642 if (can_sign_extend_int_p(i0)) {
3643#if __X64
3644 rex(0, WIDE, r0, _NOREG, r1);
3645 ic(0x63);
3646#else
3647 ic(0x8b);
3648#endif
3649 rx(r0, i0, r1, _NOREG, _SCL1);
3650 }
3651 else {
3652 reg = jit_get_reg(jit_class_gpr);
3653 movi(rn(reg), i0);
3654 ldxr_i(r0, r1, rn(reg));
3655 jit_unget_reg(reg);
3656 }
3657}
3658#endif
3659
3660#if __X64
3661static void
3662_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3663{
3664#if __X64_32
3665 addr(r0, r1, r2);
3666 /* to avoid confusion with macro renames */
3667 _ldr_ui(_jit, r0, r0);
3668#else
3669 rex(0, 0, r0, r1, r2);
3670 ic(0x8b);
3671 rx(r0, 0, r2, r1, _SCL1);
3672#endif
3673}
3674
3675static void
3676_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3677{
3678 jit_int32_t reg;
3679 if (can_sign_extend_int_p(i0)) {
3680 rex(0, 0, r0, _NOREG, r1);
3681 ic(0x8b);
3682 rx(r0, i0, r1, _NOREG, _SCL1);
3683 }
3684 else {
3685 reg = jit_get_reg(jit_class_gpr);
3686 movi(rn(reg), i0);
79bfeef6
PC
3687# if __X64_32
3688 ldxr_i(r0, r1, rn(reg));
3689# else
4a71579b 3690 ldxr_ui(r0, r1, rn(reg));
79bfeef6 3691# endif
4a71579b
PC
3692 jit_unget_reg(reg);
3693 }
3694}
3695
3696# if !__X64_32
3697static void
3698_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3699{
3700 rex(0, 1, r0, r1, r2);
3701 ic(0x8b);
3702 rx(r0, 0, r2, r1, _SCL1);
3703}
3704
3705static void
3706_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3707{
3708 jit_int32_t reg;
3709 if (can_sign_extend_int_p(i0)) {
3710 rex(0, 1, r0, _NOREG, r1);
3711 ic(0x8b);
3712 rx(r0, i0, r1, _NOREG, _SCL1);
3713 }
3714 else {
3715 reg = jit_get_reg(jit_class_gpr);
3716 movi(rn(reg), i0);
3717 ldxr_l(r0, r1, rn(reg));
3718 jit_unget_reg(reg);
3719 }
3720}
3721# endif
3722#endif
3723
3724static void
3725_str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3726{
3727 jit_int32_t reg;
3728 if (reg8_p(r1)) {
3729 rex(0, 0, r1, _NOREG, r0);
3730 ic(0x88);
3731 rx(r1, 0, r0, _NOREG, _SCL1);
3732 }
3733 else {
3734 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3735 movr(rn(reg), r1);
3736 rex(0, 0, rn(reg), _NOREG, r0);
3737 ic(0x88);
3738 rx(rn(reg), 0, r0, _NOREG, _SCL1);
3739 jit_unget_reg(reg);
3740 }
3741}
3742
3743static void
3744_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3745{
3746 jit_int32_t reg;
79bfeef6
PC
3747#if CAN_RIP_ADDRESS
3748 jit_word_t rel = i0 - _jit->pc.w;
3749 rel = rel < 0 ? rel - 16 : rel + 16;
3750 if (can_sign_extend_int_p(rel)) {
3751 if (reg8_p(r0)) {
3752 rex(0, 0, r0, _NOREG, _NOREG);
3753 ic(0x88);
3754 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3755 }
3756 else {
3757 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3758 movr(rn(reg), r0);
3759 rex(0, 0, rn(reg), _NOREG, _NOREG);
3760 ic(0x88);
3761 rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3762 jit_unget_reg(reg);
3763 }
3764 }
3765 else
3766#endif
3767 if (address_p(i0)) {
4a71579b
PC
3768 if (reg8_p(r0)) {
3769 rex(0, 0, r0, _NOREG, _NOREG);
3770 ic(0x88);
3771 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3772 }
3773 else {
3774 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3775 movr(rn(reg), r0);
3776 rex(0, 0, rn(reg), _NOREG, _NOREG);
3777 ic(0x88);
3778 rx(rn(reg), i0, _NOREG, _NOREG, _SCL1);
3779 jit_unget_reg(reg);
3780 }
3781 }
3782 else {
3783 reg = jit_get_reg(jit_class_gpr);
3784 movi(rn(reg), i0);
3785 str_c(rn(reg), r0);
3786 jit_unget_reg(reg);
3787 }
3788}
3789
3790static void
3791_str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3792{
3793 ic(0x66);
3794 rex(0, 0, r1, _NOREG, r0);
3795 ic(0x89);
3796 rx(r1, 0, r0, _NOREG, _SCL1);
3797}
3798
3799static void
3800_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3801{
3802 jit_int32_t reg;
79bfeef6
PC
3803#if CAN_RIP_ADDRESS
3804 jit_word_t rel = i0 - _jit->pc.w;
3805 rel = rel < 0 ? rel - 8 : rel + 8;
3806 if (can_sign_extend_int_p(rel)) {
3807 ic(0x66);
3808 rex(0, 0, r0, _NOREG, _NOREG);
3809 ic(0x89);
3810 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3811 }
3812 else
3813#endif
3814 if (address_p(i0)) {
4a71579b
PC
3815 ic(0x66);
3816 rex(0, 0, r0, _NOREG, _NOREG);
3817 ic(0x89);
3818 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3819 }
3820 else {
3821 reg = jit_get_reg(jit_class_gpr);
3822 movi(rn(reg), i0);
3823 str_s(rn(reg), r0);
3824 jit_unget_reg(reg);
3825 }
3826}
3827
3828static void
3829_str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3830{
3831 rex(0, 0, r1, _NOREG, r0);
3832 ic(0x89);
3833 rx(r1, 0, r0, _NOREG, _SCL1);
3834}
3835
3836static void
3837_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3838{
3839 jit_int32_t reg;
79bfeef6
PC
3840#if CAN_RIP_ADDRESS
3841 jit_word_t rel = i0 - _jit->pc.w;
3842 rel = rel < 0 ? rel - 8 : rel + 8;
3843 if (can_sign_extend_int_p(rel)) {
3844 rex(0, 0, r0, _NOREG, _NOREG);
3845 ic(0x89);
3846 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3847 }
3848 else
3849#endif
3850 if (address_p(i0)) {
4a71579b
PC
3851 rex(0, 0, r0, _NOREG, _NOREG);
3852 ic(0x89);
3853 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3854 }
3855 else {
3856 reg = jit_get_reg(jit_class_gpr);
3857 movi(rn(reg), i0);
3858 str_i(rn(reg), r0);
3859 jit_unget_reg(reg);
3860 }
3861}
3862
3863#if __X64 && !__X64_32
3864static void
3865_str_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3866{
3867 rex(0, 1, r1, _NOREG, r0);
3868 ic(0x89);
3869 rx(r1, 0, r0, _NOREG, _SCL1);
3870}
3871
3872static void
3873_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3874{
3875 jit_int32_t reg;
79bfeef6
PC
3876#if CAN_RIP_ADDRESS
3877 jit_word_t rel = i0 - _jit->pc.w;
3878 rel = rel < 0 ? rel - 8 : rel + 8;
3879 if (can_sign_extend_int_p(rel)) {
3880 rex(0, WIDE, r0, _NOREG, _NOREG);
3881 ic(0x89);
3882 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3883 }
3884 else
3885#endif
4a71579b 3886 if (can_sign_extend_int_p(i0)) {
79bfeef6 3887 rex(0, WIDE, r0, _NOREG, _NOREG);
4a71579b
PC
3888 ic(0x89);
3889 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3890 }
3891 else {
3892 reg = jit_get_reg(jit_class_gpr);
3893 movi(rn(reg), i0);
3894 str_l(rn(reg), r0);
3895 jit_unget_reg(reg);
3896 }
3897}
3898#endif
3899
3900static void
3901_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3902{
3903 jit_int32_t reg;
3904#if __X64_32
3905 reg = jit_get_reg(jit_class_gpr);
3906 addr(rn(reg), r0, r1);
3907 str_c(rn(reg), r2);
3908 jit_unget_reg(reg);
3909#else
3910 if (reg8_p(r2)) {
3911 rex(0, 0, r2, r1, r0);
3912 ic(0x88);
3913 rx(r2, 0, r0, r1, _SCL1);
3914 }
3915 else {
3916 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3917 movr(rn(reg), r2);
3918 rex(0, 0, rn(reg), r1, r0);
3919 ic(0x88);
3920 rx(rn(reg), 0, r0, r1, _SCL1);
3921 jit_unget_reg(reg);
3922 }
3923#endif
3924}
3925
3926static void
3927_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3928{
3929 jit_int32_t reg;
3930 if (can_sign_extend_int_p(i0)) {
3931 if (reg8_p(r1)) {
3932 rex(0, 0, r1, _NOREG, r0);
3933 ic(0x88);
3934 rx(r1, i0, r0, _NOREG, _SCL1);
3935 }
3936 else {
3937 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3938 movr(rn(reg), r1);
3939 rex(0, 0, rn(reg), _NOREG, r0);
3940 ic(0x88);
3941 rx(rn(reg), i0, r0, _NOREG, _SCL1);
3942 jit_unget_reg(reg);
3943 }
3944 }
3945 else {
3946 reg = jit_get_reg(jit_class_gpr);
3947 movi(rn(reg), i0);
3948 stxr_c(rn(reg), r0, r1);
3949 jit_unget_reg(reg);
3950 }
3951}
3952
3953static void
3954_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3955{
3956#if __X64_32
3957 jit_int32_t reg;
3958 reg = jit_get_reg(jit_class_gpr);
3959 addr(rn(reg), r0, r1);
3960 str_s(rn(reg), r2);
3961 jit_unget_reg(reg);
3962#else
3963 ic(0x66);
3964 rex(0, 0, r2, r1, r0);
3965 ic(0x89);
3966 rx(r2, 0, r0, r1, _SCL1);
3967#endif
3968}
3969
3970static void
3971_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3972{
3973 jit_int32_t reg;
3974 if (can_sign_extend_int_p(i0)) {
3975 ic(0x66);
3976 rex(0, 0, r1, _NOREG, r0);
3977 ic(0x89);
3978 rx(r1, i0, r0, _NOREG, _SCL1);
3979 }
3980 else {
3981 reg = jit_get_reg(jit_class_gpr);
3982 movi(rn(reg), i0);
3983 stxr_s(rn(reg), r0, r1);
3984 jit_unget_reg(reg);
3985 }
3986}
3987
3988static void
3989_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3990{
3991#if __X64_32
3992 jit_int32_t reg;
3993 reg = jit_get_reg(jit_class_gpr);
3994 addr(rn(reg), r0, r1);
3995 str_i(rn(reg), r2);
3996 jit_unget_reg(reg);
3997#else
3998 rex(0, 0, r2, r1, r0);
3999 ic(0x89);
4000 rx(r2, 0, r0, r1, _SCL1);
4001#endif
4002}
4003
4004static void
4005_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4006{
4007 jit_int32_t reg;
4008 if (can_sign_extend_int_p(i0)) {
4009 rex(0, 0, r1, _NOREG, r0);
4010 ic(0x89);
4011 rx(r1, i0, r0, _NOREG, _SCL1);
4012 }
4013 else {
4014 reg = jit_get_reg(jit_class_gpr);
4015 movi(rn(reg), i0);
4016 stxr_i(rn(reg), r0, r1);
4017 jit_unget_reg(reg);
4018 }
4019}
4020
4021#if __X64 && !__X64_32
4022static void
4023_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
4024{
4025 rex(0, 1, r2, r1, r0);
4026 ic(0x89);
4027 rx(r2, 0, r0, r1, _SCL1);
4028}
4029
4030static void
4031_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4032{
4033 jit_int32_t reg;
4034 if (can_sign_extend_int_p(i0)) {
4035 rex(0, 1, r1, _NOREG, r0);
4036 ic(0x89);
4037 rx(r1, i0, r0, _NOREG, _SCL1);
4038 }
4039 else {
4040 reg = jit_get_reg(jit_class_gpr);
4041 movi(rn(reg), i0);
4042 stxr_l(rn(reg), r0, r1);
4043 jit_unget_reg(reg);
4044 }
4045}
4046#endif
4047
79bfeef6 4048static jit_word_t
4a71579b
PC
4049_jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
4050{
79bfeef6 4051 jit_word_t d;
4a71579b 4052 jit_word_t w;
79bfeef6 4053 w = _jit->pc.w;
ba86ff93 4054 d = i0 - (w + 2);
4a71579b 4055 ic(0x70 | code);
79bfeef6
PC
4056 ic(d);
4057 return (w);
4a71579b
PC
4058}
4059
79bfeef6 4060static jit_word_t
4a71579b
PC
4061_jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
4062{
79bfeef6 4063 jit_word_t d;
4a71579b 4064 jit_word_t w;
79bfeef6 4065 w = _jit->pc.w;
4a71579b 4066 ic(0x0f);
79bfeef6 4067 d = i0 - (w + 6);
4a71579b 4068 ic(0x80 | code);
79bfeef6
PC
4069 ii(d);
4070 return (w);
4a71579b
PC
4071}
4072
79bfeef6 4073static jit_word_t
4a71579b
PC
4074_jcr(jit_state_t *_jit,
4075 jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4076{
4077 alur(X86_CMP, r0, r1);
79bfeef6 4078 return (jcc(code, i0));
4a71579b
PC
4079}
4080
79bfeef6 4081static jit_word_t
4a71579b
PC
4082_jci(jit_state_t *_jit,
4083 jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4084{
4085 alui(X86_CMP, r0, i1);
79bfeef6 4086 return (jcc(code, i0));
4a71579b
PC
4087}
4088
79bfeef6 4089static jit_word_t
4a71579b
PC
4090_jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
4091{
4092 testr(r0, r0);
79bfeef6 4093 return (jcc(code, i0));
4a71579b
PC
4094}
4095
4096static jit_word_t
4097_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4098{
79bfeef6 4099 return (jcr(X86_CC_L, i0, r0, r1));
4a71579b
PC
4100}
4101
4102static jit_word_t
4103_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4104{
79bfeef6
PC
4105 jit_word_t w;
4106 if (i1) w = jci (X86_CC_L, i0, r0, i1);
4107 else w = jci0(X86_CC_S, i0, r0);
4108 return (w);
4a71579b
PC
4109}
4110
4111static jit_word_t
4112_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4113{
79bfeef6 4114 return (jcr(X86_CC_B, i0, r0, r1));
4a71579b
PC
4115}
4116
4117static jit_word_t
4118_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4119{
79bfeef6
PC
4120 jit_word_t w;
4121 if (i1) w = jci (X86_CC_B, i0, r0, i1);
4122 else w = jci0(X86_CC_B, i0, r0);
4123 return (w);
4a71579b
PC
4124}
4125
4126static jit_word_t
4127_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4128{
79bfeef6
PC
4129 jit_word_t w;
4130 if (r0 == r1) w = jmpi(i0);
4131 else w = jcr (X86_CC_LE, i0, r0, r1);
4132 return (w);
4a71579b
PC
4133}
4134
4135static jit_word_t
4136_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4137{
79bfeef6
PC
4138 jit_word_t w;
4139 if (i1) w = jci (X86_CC_LE, i0, r0, i1);
4140 else w = jci0(X86_CC_LE, i0, r0);
4141 return (w);
4a71579b
PC
4142}
4143
4144static jit_word_t
4145_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4146{
79bfeef6
PC
4147 jit_word_t w;
4148 if (r0 == r1) w = jmpi(i0);
4149 else w = jcr (X86_CC_BE, i0, r0, r1);
4150 return (w);
4a71579b
PC
4151}
4152
4153static jit_word_t
4154_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4155{
79bfeef6
PC
4156 jit_word_t w;
4157 if (i1) w = jci (X86_CC_BE, i0, r0, i1);
4158 else w = jci0(X86_CC_BE, i0, r0);
4159 return (w);
4a71579b
PC
4160}
4161
4162static jit_word_t
4163_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4164{
79bfeef6
PC
4165 jit_word_t w;
4166 if (r0 == r1) w = jmpi(i0);
4167 else w = jcr (X86_CC_E, i0, r0, r1);
4168 return (w);
4a71579b
PC
4169}
4170
4171static jit_word_t
4172_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4173{
79bfeef6
PC
4174 jit_word_t w;
4175 if (i1) w = jci (X86_CC_E, i0, r0, i1);
4176 else w = jci0(X86_CC_E, i0, r0);
4177 return (w);
4a71579b
PC
4178}
4179
4180static jit_word_t
4181_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4182{
79bfeef6
PC
4183 jit_word_t w;
4184 if (r0 == r1) w = jmpi(i0);
4185 else w = jcr (X86_CC_GE, i0, r0, r1);
4186 return (w);
4a71579b
PC
4187}
4188
4189static jit_word_t
4190_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4191{
79bfeef6
PC
4192 jit_word_t w;
4193 if (i1) w = jci (X86_CC_GE, i0, r0, i1);
4194 else w = jci0(X86_CC_NS, i0, r0);
4195 return (w);
4a71579b
PC
4196}
4197
4198static jit_word_t
4199_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4200{
79bfeef6
PC
4201 jit_word_t w;
4202 if (r0 == r1) w = jmpi(i0);
4203 else w = jcr (X86_CC_AE, i0, r0, r1);
4204 return (w);
4a71579b
PC
4205}
4206
4207static jit_word_t
4208_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4209{
79bfeef6
PC
4210 jit_word_t w;
4211 if (i1) w = jci (X86_CC_AE, i0, r0, i1);
4212 else w = jmpi(i0);
4213 return (w);
4a71579b
PC
4214}
4215
4216static jit_word_t
4217_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4218{
79bfeef6 4219 return (jcr(X86_CC_G, i0, r0, r1));
4a71579b
PC
4220}
4221
4222static jit_word_t
4223_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4224{
79bfeef6 4225 return (jci(X86_CC_G, i0, r0, i1));
4a71579b
PC
4226}
4227
4228static jit_word_t
4229_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4230{
79bfeef6 4231 return (jcr(X86_CC_A, i0, r0, r1));
4a71579b
PC
4232}
4233
4234static jit_word_t
4235_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4236{
79bfeef6
PC
4237 jit_word_t w;
4238 if (i1) w = jci (X86_CC_A, i0, r0, i1);
4239 else w = jci0(X86_CC_NE, i0, r0);
4240 return (w);
4a71579b
PC
4241}
4242
4243static jit_word_t
4244_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4245{
79bfeef6 4246 return (jcr(X86_CC_NE, i0, r0, r1));
4a71579b
PC
4247}
4248
4249static jit_word_t
4250_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4251{
79bfeef6
PC
4252 jit_word_t w;
4253 if (i1) w = jci (X86_CC_NE, i0, r0, i1);
4254 else w = jci0(X86_CC_NE, i0, r0);
4255 return (w);
4a71579b
PC
4256}
4257
4258static jit_word_t
4259_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4260{
4261 testr(r0, r1);
79bfeef6 4262 return (jnz(i0));
4a71579b
PC
4263}
4264
4265static jit_word_t
4266_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4267{
4268 jit_int32_t reg;
4269 if (can_zero_extend_int_p(i1))
4270 testi(r0, i1);
4271 else {
4272 reg = jit_get_reg(jit_class_gpr);
4273 movi(rn(reg), i1);
4274 testr(r0, rn(reg));
4275 jit_unget_reg(reg);
4276 }
79bfeef6 4277 return (jnz(i0));
4a71579b
PC
4278}
4279
4280static jit_word_t
4281_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4282{
4283 testr(r0, r1);
79bfeef6 4284 return (jz(i0));
4a71579b
PC
4285}
4286
4287static jit_word_t
4288_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4289{
4290 jit_int32_t reg;
4291 if (can_zero_extend_int_p(i1))
4292 testi(r0, i1);
4293 else {
4294 reg = jit_get_reg(jit_class_gpr);
4295 movi(rn(reg), i1);
4296 testr(r0, rn(reg));
4297 jit_unget_reg(reg);
4298 }
79bfeef6 4299 return (jz(i0));
4a71579b
PC
4300}
4301
4302static jit_word_t
4303_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4304{
4305 iaddr(r0, r1);
79bfeef6 4306 return (jo(i0));
4a71579b
PC
4307}
4308
4309static jit_word_t
4310_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4311{
4312 jit_int32_t reg;
4313 if (can_sign_extend_int_p(i1)) {
4314 iaddi(r0, i1);
79bfeef6 4315 return (jo(i0));
4a71579b
PC
4316 }
4317 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4318 movi(rn(reg), i1);
4319 jit_unget_reg(reg);
4320 return (boaddr(i0, r0, rn(reg)));
4321}
4322
4323static jit_word_t
4324_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4325{
4326 iaddr(r0, r1);
79bfeef6 4327 return (jc(i0));
4a71579b
PC
4328}
4329
4330static jit_word_t
4331_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4332{
4333 jit_int32_t reg;
4334 if (can_sign_extend_int_p(i1)) {
4335 iaddi(r0, i1);
79bfeef6 4336 return (jc(i0));
4a71579b
PC
4337 }
4338 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4339 movi(rn(reg), i1);
4340 jit_unget_reg(reg);
4341 return (boaddr_u(i0, r0, rn(reg)));
4342}
4343
4344static jit_word_t
4345_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4346{
4347 iaddr(r0, r1);
79bfeef6 4348 return (jno(i0));
4a71579b
PC
4349}
4350
4351static jit_word_t
4352_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4353{
4354 jit_int32_t reg;
4355 if (can_sign_extend_int_p(i1)) {
4356 iaddi(r0, i1);
79bfeef6 4357 return (jno(i0));
4a71579b
PC
4358 }
4359 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4360 movi(rn(reg), i1);
4361 jit_unget_reg(reg);
4362 return (bxaddr(i0, r0, rn(reg)));
4363}
4364
4365static jit_word_t
4366_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4367{
4368 iaddr(r0, r1);
79bfeef6 4369 return (jnc(i0));
4a71579b
PC
4370}
4371
4372static jit_word_t
4373_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4374{
4375 jit_int32_t reg;
4376 if (can_sign_extend_int_p(i1)) {
4377 iaddi(r0, i1);
79bfeef6 4378 return (jnc(i0));
4a71579b
PC
4379 }
4380 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4381 movi(rn(reg), i1);
4382 jit_unget_reg(reg);
4383 return (bxaddr_u(i0, r0, rn(reg)));
4384}
4385
4386static jit_word_t
4387_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4388{
4389 isubr(r0, r1);
79bfeef6 4390 return (jo(i0));
4a71579b
PC
4391}
4392
4393static jit_word_t
4394_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4395{
4396 jit_int32_t reg;
4397 if (can_sign_extend_int_p(i1)) {
4398 isubi(r0, i1);
79bfeef6 4399 return (jo(i0));
4a71579b
PC
4400 }
4401 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4402 movi(rn(reg), i1);
4403 jit_unget_reg(reg);
4404 return (bosubr(i0, r0, rn(reg)));
4405}
4406
4407static jit_word_t
4408_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4409{
4410 isubr(r0, r1);
79bfeef6 4411 return (jc(i0));
4a71579b
PC
4412}
4413
4414static jit_word_t
4415_bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4416{
4417 jit_int32_t reg;
4418 if (can_sign_extend_int_p(i1)) {
4419 isubi(r0, i1);
79bfeef6 4420 return (jc(i0));
4a71579b
PC
4421 }
4422 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4423 movi(rn(reg), i1);
4424 jit_unget_reg(reg);
4425 return (bosubr_u(i0, r0, rn(reg)));
4426}
4427
4428static jit_word_t
4429_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4430{
4431 isubr(r0, r1);
79bfeef6 4432 return (jno(i0));
4a71579b
PC
4433}
4434
4435static jit_word_t
4436_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4437{
4438 jit_int32_t reg;
4439 if (can_sign_extend_int_p(i1)) {
4440 isubi(r0, i1);
79bfeef6 4441 return (jno(i0));
4a71579b
PC
4442 }
4443 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4444 movi(rn(reg), i1);
4445 jit_unget_reg(reg);
4446 return (bxsubr(i0, r0, rn(reg)));
4447}
4448
4449static jit_word_t
4450_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4451{
4452 isubr(r0, r1);
79bfeef6 4453 return (jnc(i0));
4a71579b
PC
4454}
4455
4456static jit_word_t
4457_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4458{
4459 jit_int32_t reg;
4460 if (can_sign_extend_int_p(i1)) {
4461 isubi(r0, i1);
79bfeef6 4462 return (jnc(i0));
4a71579b
PC
4463 }
4464 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4465 movi(rn(reg), i1);
4466 jit_unget_reg(reg);
4467 return (bxsubr_u(i0, r0, rn(reg)));
4468}
4469
4470static void
4471_callr(jit_state_t *_jit, jit_int32_t r0)
4472{
4473 rex(0, 0, _NOREG, _NOREG, r0);
4474 ic(0xff);
4475 mrm(0x03, 0x02, r7(r0));
4476}
4477
4478static jit_word_t
4479_calli(jit_state_t *_jit, jit_word_t i0)
4480{
519a9ea1 4481 jit_word_t w;
79bfeef6
PC
4482 jit_word_t d;
4483 jit_word_t l = _jit->pc.w + 5;
4484 d = i0 - l;
4a71579b 4485#if __X64
79bfeef6
PC
4486 if (
4487# if __X64_32
4488 !((d < 0) ^ (l < 0)) &&
4489# endif
4490 (jit_int32_t)d == d) {
519a9ea1 4491#endif
79bfeef6 4492 w = _jit->pc.w;
519a9ea1 4493 ic(0xe8);
79bfeef6 4494 ii(d);
519a9ea1
PC
4495#if __X64
4496 }
4497 else
79bfeef6 4498 w = calli_p(i0);
519a9ea1 4499#endif
79bfeef6 4500 return (w);
519a9ea1 4501}
4a71579b 4502
519a9ea1
PC
4503#if __X64
4504static jit_word_t
4505_calli_p(jit_state_t *_jit, jit_word_t i0)
4506{
79bfeef6 4507 jit_word_t w;
519a9ea1 4508 jit_int32_t reg;
4a71579b 4509 reg = jit_get_reg(jit_class_gpr);
79bfeef6 4510 w = movi_p(rn(reg), i0);
4a71579b
PC
4511 callr(rn(reg));
4512 jit_unget_reg(reg);
79bfeef6 4513 return (w);
4a71579b 4514}
519a9ea1 4515#endif
4a71579b
PC
4516
4517static void
4518_jmpr(jit_state_t *_jit, jit_int32_t r0)
4519{
519a9ea1 4520 rex(0, 0, _NOREG, _NOREG, r0);
4a71579b
PC
4521 ic(0xff);
4522 mrm(0x03, 0x04, r7(r0));
4523}
4524
4525static jit_word_t
4526_jmpi(jit_state_t *_jit, jit_word_t i0)
4527{
4528 jit_word_t w;
79bfeef6
PC
4529 jit_word_t d;
4530 jit_word_t l = _jit->pc.w + 5;
4531 d = i0 - l;
519a9ea1 4532#if __X64
79bfeef6
PC
4533 if (
4534# if __X64_32
4535 !((d < 0) ^ (l < 0)) &&
4536# endif
4537 (jit_int32_t)d == d) {
519a9ea1 4538#endif
79bfeef6 4539 w = _jit->pc.w;
519a9ea1 4540 ic(0xe9);
79bfeef6 4541 ii(d);
519a9ea1
PC
4542#if __X64
4543 }
4544 else
79bfeef6 4545 w = jmpi_p(i0);
519a9ea1 4546#endif
79bfeef6 4547 return (w);
4a71579b
PC
4548}
4549
519a9ea1
PC
4550#if __X64
4551static jit_word_t
4552_jmpi_p(jit_state_t *_jit, jit_word_t i0)
4553{
79bfeef6 4554 jit_word_t w;
519a9ea1
PC
4555 jit_int32_t reg;
4556 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
79bfeef6 4557 w = movi_p(rn(reg), i0);
519a9ea1
PC
4558 jmpr(rn(reg));
4559 jit_unget_reg(reg);
79bfeef6 4560 return (w);
519a9ea1
PC
4561}
4562#endif
4563
79bfeef6 4564static jit_word_t
4a71579b
PC
4565_jmpsi(jit_state_t *_jit, jit_uint8_t i0)
4566{
79bfeef6 4567 jit_word_t w = _jit->pc.w;
4a71579b
PC
4568 ic(0xeb);
4569 ic(i0);
79bfeef6 4570 return (w);
4a71579b 4571}
ba86ff93
PC
4572#undef clear
4573#undef allocr
4574#undef savset
4a71579b
PC
4575
4576static void
4577_prolog(jit_state_t *_jit, jit_node_t *node)
4578{
79bfeef6 4579 jit_int32_t reg, offs;
4a71579b
PC
4580 if (_jitc->function->define_frame || _jitc->function->assume_frame) {
4581 jit_int32_t frame = -_jitc->function->frame;
79bfeef6 4582 jit_check_frame();
4a71579b
PC
4583 assert(_jitc->function->self.aoff >= frame);
4584 if (_jitc->function->assume_frame)
4585 return;
4586 _jitc->function->self.aoff = frame;
4587 }
4588 if (_jitc->function->allocar)
4589 _jitc->function->self.aoff &= -16;
4590#if __X64 && (__CYGWIN__ || _WIN32)
4591 _jitc->function->stack = (((/* first 32 bytes must be allocated */
4592 (_jitc->function->self.alen > 32 ?
4593 _jitc->function->self.alen : 32) -
4594 /* align stack at 16 bytes */
79bfeef6 4595 _jitc->function->self.aoff) + 15) & -16);
4a71579b
PC
4596#else
4597 _jitc->function->stack = (((_jitc->function->self.alen -
79bfeef6 4598 _jitc->function->self.aoff) + 15) & -16);
4a71579b 4599#endif
79bfeef6
PC
4600
4601 if (_jitc->function->stack)
4602 _jitc->function->need_stack = 1;
4603
4604 if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
4605 /* check if any callee save register needs to be saved */
4606 for (reg = 0; reg < _jitc->reglen; ++reg)
4607 if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
4608 (_rvs[reg].spec & jit_class_sav)) {
4609 _jitc->function->need_stack = 1;
4610 break;
4611 }
4612 }
4613
4614 if (_jitc->function->need_frame || _jitc->function->need_stack)
4615 subi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4a71579b 4616 /* callee save registers */
79bfeef6
PC
4617 for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
4618 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4619 stxi(offs, _RSP_REGNO, rn(iregs[reg]));
4620 offs += REAL_WORDSIZE;
4621 }
4622 }
4623#if __X64 && (__CYGWIN__ || _WIN32)
4624 for (reg = 0; reg < jit_size(fregs); reg++) {
4625 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
4626 sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg]));
4627 offs += sizeof(jit_float64_t);
4628 }
4629 }
4a71579b 4630#endif
79bfeef6
PC
4631
4632 if (_jitc->function->need_frame) {
4633 stxi(0, _RSP_REGNO, _RBP_REGNO);
4634 movr(_RBP_REGNO, _RSP_REGNO);
4635 }
4a71579b
PC
4636
4637 /* alloca */
79bfeef6
PC
4638 if (_jitc->function->stack)
4639 subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
4a71579b
PC
4640 if (_jitc->function->allocar) {
4641 reg = jit_get_reg(jit_class_gpr);
4642 movi(rn(reg), _jitc->function->self.aoff);
4643 stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg));
4644 jit_unget_reg(reg);
4645 }
4646
4647#if __X64 && !(__CYGWIN__ || _WIN32)
4648 if (_jitc->function->self.call & jit_call_varargs) {
4649 jit_word_t nofp_code;
4650
4651 /* Save gp registers in the save area, if any is a vararg */
4652 for (reg = first_gp_from_offset(_jitc->function->vagp);
4653 jit_arg_reg_p(reg); ++reg)
4654 stxi(_jitc->function->vaoff + first_gp_offset +
4655 reg * 8, _RBP_REGNO, rn(JIT_RA0 - reg));
4656
4657 reg = first_fp_from_offset(_jitc->function->vafp);
4658 if (jit_arg_f_reg_p(reg)) {
4659 /* Skip over if no float registers were passed as argument */
4660 /* test %al, %al */
4661 ic(0x84);
4662 ic(0xc0);
79bfeef6 4663 nofp_code = jes(0);
4a71579b
PC
4664
4665 /* Save fp registers in the save area, if any is a vararg */
4666 /* Note that the full 16 byte xmm is not saved, because
4667 * lightning only handles float and double, and, while
4668 * attempting to provide a va_list compatible pointer as
4669 * jit_va_start return, does not guarantee it (on all ports). */
4670 for (; jit_arg_f_reg_p(reg); ++reg)
4671 sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
4672 reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
4673
79bfeef6 4674 patch_at(nofp_code, _jit->pc.w);
4a71579b
PC
4675 }
4676 }
4677#endif
4678}
4679
4680static void
4681_epilog(jit_state_t *_jit, jit_node_t *node)
4682{
79bfeef6 4683 jit_int32_t reg, offs;
4a71579b
PC
4684 if (_jitc->function->assume_frame)
4685 return;
79bfeef6
PC
4686 if (_jitc->function->need_frame)
4687 movr(_RSP_REGNO, _RBP_REGNO);
4688
4a71579b 4689 /* callee save registers */
79bfeef6
PC
4690 for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
4691 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4692 ldxi(rn(iregs[reg]), _RSP_REGNO, offs);
4693 offs += REAL_WORDSIZE;
4694 }
4695 }
4696#if __X64 && (__CYGWIN__ || _WIN32)
4697 for (reg = 0; reg < jit_size(fregs); reg++) {
4698 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
4699 sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs);
4700 offs += sizeof(jit_float64_t);
4701 }
4702 }
4a71579b 4703#endif
79bfeef6
PC
4704
4705 if (_jitc->function->need_frame) {
4706 ldxi(_RBP_REGNO, _RSP_REGNO, 0);
4707 addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4708 }
4709 /* This condition does not happen as much as expected because
4710 * it is not safe to not create a frame pointer if any function
4711 * is called, even jit functions, as those might call external
4712 * functions. */
4713 else if (_jitc->function->need_stack)
4714 addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4a71579b
PC
4715
4716 ic(0xc3);
4717}
4718
4719static void
4720_vastart(jit_state_t *_jit, jit_int32_t r0)
4721{
4722#if __X32 || __CYGWIN__ || _WIN32
4723 assert(_jitc->function->self.call & jit_call_varargs);
79bfeef6 4724 addi(r0, _RBP_REGNO, jit_selfsize());
4a71579b
PC
4725#else
4726 jit_int32_t reg;
4727
4728 assert(_jitc->function->self.call & jit_call_varargs);
4729
4730 /* Return jit_va_list_t in the register argument */
4731 addi(r0, _RBP_REGNO, _jitc->function->vaoff);
4732 reg = jit_get_reg(jit_class_gpr);
4733
4734 /* Initialize gp offset in the save area. */
4735 movi(rn(reg), _jitc->function->vagp);
4736 stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
4737
4738 /* Initialize fp offset in the save area. */
4739 movi(rn(reg), _jitc->function->vafp);
4740 stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
4741
4742 /* Initialize overflow pointer to the first stack argument. */
79bfeef6 4743 addi(rn(reg), _RBP_REGNO, jit_selfsize());
4a71579b
PC
4744 stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
4745
4746 /* Initialize register save area pointer. */
4747 addi(rn(reg), r0, first_gp_offset);
4748 stxi(offsetof(jit_va_list_t, save), r0, rn(reg));
4749
4750 jit_unget_reg(reg);
4751#endif
4752}
4753
4754static void
4755_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4756{
4757#if __X32 || __CYGWIN__ || _WIN32
4758 assert(_jitc->function->self.call & jit_call_varargs);
4759 ldr(r0, r1);
4760 addi(r1, r1, va_gp_increment);
4761#else
4762 jit_int32_t rg0;
4763 jit_int32_t rg1;
4764 jit_word_t ge_code;
4765 jit_word_t lt_code;
4766
4767 assert(_jitc->function->self.call & jit_call_varargs);
4768
4769 rg0 = jit_get_reg(jit_class_gpr);
4770 rg1 = jit_get_reg(jit_class_gpr);
4771
4772 /* Load the gp offset in save area in the first temporary. */
4773 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
4774
4775 /* Jump over if there are no remaining arguments in the save area. */
4776 icmpi(rn(rg0), va_gp_max_offset);
79bfeef6 4777 ge_code = jaes(0);
4a71579b
PC
4778
4779 /* Load the save area pointer in the second temporary. */
4780 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
4781
4782 /* Load the vararg argument in the first argument. */
4783 ldxr(r0, rn(rg1), rn(rg0));
4784
4785 /* Update the gp offset. */
4786 addi(rn(rg0), rn(rg0), 8);
4787 stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
4788
4789 /* Will only need one temporary register below. */
4790 jit_unget_reg(rg1);
4791
4792 /* Jump over overflow code. */
79bfeef6 4793 lt_code = jmpsi(0);
4a71579b
PC
4794
4795 /* Where to land if argument is in overflow area. */
79bfeef6 4796 patch_at(ge_code, _jit->pc.w);
4a71579b
PC
4797
4798 /* Load overflow pointer. */
4799 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
4800
4801 /* Load argument. */
4802 ldr(r0, rn(rg0));
4803
4804 /* Update overflow pointer. */
4805 addi(rn(rg0), rn(rg0), va_gp_increment);
4806 stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
4807
4808 /* Where to land if argument is in save area. */
79bfeef6 4809 patch_at(lt_code, _jit->pc.w);
4a71579b
PC
4810
4811 jit_unget_reg(rg0);
4812#endif
4813}
4814
4815/* The x87 boolean argument tells if will put the result in a x87
4816 * register if non false, in a sse register otherwise. */
4817static void
4818_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
4819{
4820#if __X32 || __CYGWIN__ || _WIN32
4821 assert(_jitc->function->self.call & jit_call_varargs);
4822 if (x87)
4823 x87_ldr_d(r0, r1);
4824 else
4825 sse_ldr_d(r0, r1);
4826 addi(r1, r1, 8);
4827#else
4828 jit_int32_t rg0;
4829 jit_int32_t rg1;
4830 jit_word_t ge_code;
4831 jit_word_t lt_code;
4832
4833 assert(_jitc->function->self.call & jit_call_varargs);
4834
4835 rg0 = jit_get_reg(jit_class_gpr);
4836 rg1 = jit_get_reg(jit_class_gpr);
4837
4838 /* Load the fp offset in save area in the first temporary. */
4839 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
4840
4841 /* Jump over if there are no remaining arguments in the save area. */
4842 icmpi(rn(rg0), va_fp_max_offset);
79bfeef6 4843 ge_code = jaes(0);
4a71579b
PC
4844
4845 /* Load the save area pointer in the second temporary. */
4846 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
4847
4848 /* Load the vararg argument in the first argument. */
4849 if (x87)
4850 x87_ldxr_d(r0, rn(rg1), rn(rg0));
4851 else
4852 sse_ldxr_d(r0, rn(rg1), rn(rg0));
4853
4854 /* Update the fp offset. */
4855 addi(rn(rg0), rn(rg0), va_fp_increment);
4856 stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
4857
4858 /* Will only need one temporary register below. */
4859 jit_unget_reg(rg1);
4860
4861 /* Jump over overflow code. */
79bfeef6 4862 lt_code = jmpsi(0);
4a71579b
PC
4863
4864 /* Where to land if argument is in overflow area. */
79bfeef6 4865 patch_at(ge_code, _jit->pc.w);
4a71579b
PC
4866
4867 /* Load overflow pointer. */
4868 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
4869
4870 /* Load argument. */
4871 if (x87)
4872 x87_ldr_d(r0, rn(rg0));
4873 else
4874 sse_ldr_d(r0, rn(rg0));
4875
4876 /* Update overflow pointer. */
4877 addi(rn(rg0), rn(rg0), 8);
4878 stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
4879
4880 /* Where to land if argument is in save area. */
79bfeef6 4881 patch_at(lt_code, _jit->pc.w);
4a71579b
PC
4882
4883 jit_unget_reg(rg0);
4884#endif
4885}
4886
4887static void
79bfeef6 4888_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
4a71579b 4889{
79bfeef6
PC
4890 jit_word_t disp;
4891 jit_uint8_t *code = (jit_uint8_t *)instr;
4892 ++instr;
4893 switch (code[0]) {
4894 /* movi_p */
4895 case 0xb8 ... 0xbf:
4896 *(jit_word_t *)instr = label;
4a71579b 4897 break;
79bfeef6
PC
4898 /* forward pc relative address known to be in range */
4899#if CAN_RIP_ADDRESS
4900 /* movi */
4901 case 0x8d:
4902 ++instr;
4903 goto apply;
4904#endif
4905 /* jcc */
4906 case 0x0f:
4907 ++instr;
4908 if (code[1] < 0x80 || code[1] > 0x8f)
4909 goto fail;
4910 /* calli */
4911 case 0xe8:
4912 /* jmpi */
4913 case 0xe9:
4914#if CAN_RIP_ADDRESS
4915 apply:
4916#endif
4917 disp = label - (instr + 4);
4918 assert((jit_int32_t)disp == disp);
4919 *(jit_int32_t *)instr = disp;
4920 break;
4921 /* jccs */
4922 case 0x70 ... 0x7f:
4923 /* jmpsi */
4924 case 0xeb:
4925 disp = label - (instr + 1);
4926 assert((jit_int8_t)disp == disp);
4927 *(jit_int8_t *)instr = disp;
4a71579b 4928 break;
79bfeef6
PC
4929 default:
4930 fail:
4931 abort();
4a71579b
PC
4932 }
4933}
4934#endif