drc: avoid excessive recursion in hle mode
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86-cpu.c
CommitLineData
4a71579b 1/*
79bfeef6 2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4a71579b
PC
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20/* avoid using it due to partial stalls */
21#define USE_INC_DEC 0
22
23#if PROTO
ba86ff93
PC
24# if __WORDSIZE == 64 && _WIN32
25# define ONE 1LL
26# else
27# define ONE 1L
28# endif
4a71579b
PC
29# if __X32 || __X64_32
30# define WIDE 0
31# define ldi(u, v) ldi_i(u, v)
32# define ldr(u, v) ldr_i(u, v)
33# define ldxr(u, v, w) ldxr_i(u, v, w)
34# define ldxi(u, v, w) ldxi_i(u, v, w)
ba86ff93 35# define str(u, v) str_i(u, v)
4a71579b 36# define sti(u, v) sti_i(u, v)
ba86ff93 37# define stxr(u, v, w) stxr_i(u, v, w)
4a71579b
PC
38# define stxi(u, v, w) stxi_i(u, v, w)
39# define can_sign_extend_int_p(im) 1
40# define can_zero_extend_int_p(im) 1
41# define fits_uint32_p(im) 1
42# else
43# define WIDE 1
44# define ldi(u, v) ldi_l(u, v)
45# define ldr(u, v) ldr_l(u, v)
46# define ldxr(u, v, w) ldxr_l(u, v, w)
47# define ldxi(u, v, w) ldxi_l(u, v, w)
ba86ff93 48# define str(u, v) str_l(u, v)
4a71579b 49# define sti(u, v) sti_l(u, v)
ba86ff93 50# define stxr(u, v, w) stxr_l(u, v, w)
4a71579b
PC
51# define stxi(u, v, w) stxi_l(u, v, w)
52# define can_sign_extend_int_p(im) \
ba86ff93
PC
53 (((long long)(im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \
54 ((long long)(im) < 0 && (long long)(im) > -0x80000000LL))
4a71579b
PC
55# define can_zero_extend_int_p(im) \
56 ((im) >= 0 && (im) < 0x80000000LL)
57# define fits_uint32_p(im) (((im) & 0xffffffff00000000LL) == 0)
58# endif
59# if __X32 || __CYGWIN__ || __X64_32 || _WIN32
60# define reg8_p(rn) \
61 ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
62# else
63# define reg8_p(rn) 1
64# endif
65# define _RAX_REGNO 0
66# define _RCX_REGNO 1
67# define _RDX_REGNO 2
68# define _RBX_REGNO 3
69# define _RSP_REGNO 4
70# define _RBP_REGNO 5
71# define _RSI_REGNO 6
72# define _RDI_REGNO 7
73# define _R8_REGNO 8
74# define _R9_REGNO 9
75# define _R10_REGNO 10
76# define _R11_REGNO 11
77# define _R12_REGNO 12
78# define _R13_REGNO 13
79# define _R14_REGNO 14
80# define _R15_REGNO 15
81# define r7(reg) ((reg) & 7)
82# define r8(reg) ((reg) & 15)
83# define _SCL1 0x00
84# define _SCL2 0x01
85# define _SCL4 0x02
86# define _SCL8 0x03
87# define X86_ADD 0
88# define X86_OR 1 << 3
89# define X86_ADC 2 << 3
90# define X86_SBB 3 << 3
91# define X86_AND 4 << 3
92# define X86_SUB 5 << 3
93# define X86_XOR 6 << 3
94# define X86_CMP 7 << 3
95# define X86_ROL 0
96# define X86_ROR 1
97# define X86_RCL 2
98# define X86_RCR 3
99# define X86_SHL 4
100# define X86_SHR 5
101# define X86_SAR 7
102# define X86_NOT 2
103# define X86_NEG 3
104# define X86_MUL 4
105# define X86_IMUL 5
106# define X86_DIV 6
107# define X86_IDIV 7
108# define X86_CC_O 0x0
109# define X86_CC_NO 0x1
110# define X86_CC_NAE 0x2
111# define X86_CC_B 0x2
112# define X86_CC_C 0x2
113# define X86_CC_AE 0x3
114# define X86_CC_NB 0x3
115# define X86_CC_NC 0x3
116# define X86_CC_E 0x4
117# define X86_CC_Z 0x4
118# define X86_CC_NE 0x5
119# define X86_CC_NZ 0x5
120# define X86_CC_BE 0x6
121# define X86_CC_NA 0x6
122# define X86_CC_A 0x7
123# define X86_CC_NBE 0x7
124# define X86_CC_S 0x8
125# define X86_CC_NS 0x9
126# define X86_CC_P 0xa
127# define X86_CC_PE 0xa
128# define X86_CC_NP 0xb
129# define X86_CC_PO 0xb
130# define X86_CC_L 0xc
131# define X86_CC_NGE 0xc
132# define X86_CC_GE 0xd
133# define X86_CC_NL 0xd
134# define X86_CC_LE 0xe
135# define X86_CC_NG 0xe
136# define X86_CC_G 0xf
137# define X86_CC_NLE 0xf
138# define mrm(md, r, m) *_jit->pc.uc++ = (md<<6) | (r<<3) | m
139# define sib(sc, i, b) *_jit->pc.uc++ = (sc<<6) | (i<<3) | b
140# define ic(c) *_jit->pc.uc++ = c
141# define is(s) *_jit->pc.us++ = s
142# define ii(i) *_jit->pc.ui++ = i
143# if __X64 && !__X64_32
144# define il(l) *_jit->pc.ul++ = l
145# else
146# define il(l) ii(l)
147# endif
4a71579b
PC
148# define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b)
149static void
150_rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
151# define rx(rd, md, rb, ri, ms) _rx(_jit, rd, md, rb, ri, ms)
152static void
153_rx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
ba86ff93
PC
154/*
155 * prefix 8 bits 0xc4 Three byte VEX
156 * 0xc5 Two byte VEX
157 * 0x8f Three byte XOP
158 * ~R 1 bit Inverted REX.R
159 * ~X 1 bit Inverted REX.X
160 * ~B 1 bit Inverted REX.B
161 * map 5 bits Opcode map to use
162 * W 1 bit REX.W for integer, otherwise opcode extension
163 * ~vvvv 4 bits Inverted XMM or YMM registers
164 * L 1 bit 128 bit vector if 0, 256 otherwise
165 * pp 2 bits Mandatory prefix
166 * 00 none
167 * 01 0x66
168 * 10 0xf3
169 * 11 0xf2
170 *
171 * Three byte VEX:
172 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
173 * | 1 1 0 0 0 1 0 0 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
174 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
175 * Three byte XOP:
176 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
177 * | 1 0 0 0 1 1 1 1 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
178 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
179 * Two byte VEX:
180 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
181 * | 1 1 0 0 0 1 0 1 | |~R | ~vvvv | L | pp |
182 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
183 */
184# define vex(r,x,b,map,w,vvvv,l,pp) _vex(_jit,r,x,b,map,w,vvvv,l,pp)
185static void
186_vex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
187 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
188# define nop(n) _nop(_jit, n)
189static void _nop(jit_state_t*, jit_int32_t);
190# define emms() is(0x770f)
191# define lea(md, rb, ri, ms, rd) _lea(_jit, md, rb, ri, ms, rd)
192static void
193_lea(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
194# define pushr(r0) _pushr(_jit, r0)
195static void _pushr(jit_state_t*, jit_int32_t) maybe_unused;
196# define popr(r0) _popr(_jit, r0)
197static void _popr(jit_state_t*, jit_int32_t) maybe_unused;
198# define xchgr(r0, r1) _xchgr(_jit, r0, r1)
199static void _xchgr(jit_state_t*, jit_int32_t, jit_int32_t);
200# define testr(r0, r1) _testr(_jit, r0, r1)
201static void _testr(jit_state_t*, jit_int32_t, jit_int32_t);
202# define testi(r0, i0) _testi(_jit, r0, i0)
203static void _testi(jit_state_t*, jit_int32_t, jit_word_t);
204# define cc(code, r0) _cc(_jit, code, r0)
205static void _cc(jit_state_t*, jit_int32_t, jit_int32_t);
206# define icmpr(r0, r1) alur(X86_CMP, r0, r1)
207# define alur(code, r0, r1) _alur(_jit, code, r0, r1)
208static void _alur(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
209# define icmpi(r0, i0) alui(X86_CMP, r0, i0)
210# define alui(code, r0, i0) _alui(_jit, code, r0, i0)
211static void _alui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
212# define iaddr(r0, r1) alur(X86_ADD, r0, r1)
213# define save(r0) _save(_jit, r0)
214static void _save(jit_state_t*, jit_int32_t);
215# define load(r0) _load(_jit, r0)
216static void _load(jit_state_t*, jit_int32_t);
217# define addr(r0, r1, r2) _addr(_jit, r0, r1, r2)
218static void _addr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
219# define iaddi(r0, i0) alui(X86_ADD, r0, i0)
220# define addi(r0, r1, i0) _addi(_jit, r0, r1, i0)
221static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
222#define addcr(r0, r1, r2) _addcr(_jit, r0, r1, r2)
223static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
224#define addci(r0, r1, i0) _addci(_jit, r0, r1, i0)
225static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
79bfeef6
PC
226# define iaddxr(r0, r1) _iaddxr(_jit, r0, r1)
227static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t);
4a71579b
PC
228# define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2)
229static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
230# define iaddxi(r0, i0) alui(X86_ADC, r0, i0)
231# define addxi(r0, r1, i0) _addxi(_jit, r0, r1, i0)
232static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
233# define isubr(r0, r1) alur(X86_SUB, r0, r1)
234# define subr(r0, r1, r2) _subr(_jit, r0, r1, r2)
235static void _subr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
236# define isubi(r0, i0) alui(X86_SUB, r0, i0)
237# define subi(r0, r1, i0) _subi(_jit, r0, r1, i0)
238static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
239# define subcr(r0, r1, r2) _subcr(_jit, r0, r1, r2)
240static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
241# define subci(r0, r1, i0) _subci(_jit, r0, r1, i0)
242static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
243# define isubxr(r0, r1) alur(X86_SBB, r0, r1)
244# define subxr(r0, r1, r2) _subxr(_jit, r0, r1, r2)
245static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
246# define isubxi(r0, i0) alui(X86_SBB, r0, i0)
247# define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0)
248static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
249# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0)
250static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
251# define imulr(r0, r1) _imulr(_jit, r0, r1)
252static void _imulr(jit_state_t*, jit_int32_t, jit_int32_t);
253# define imuli(r0, r1, i0) _imuli(_jit, r0, r1, i0)
254static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
255# define mulr(r0, r1, r2) _mulr(_jit, r0, r1, r2)
256static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
257# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
258static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
ba86ff93
PC
259# define hmulr(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 1)
260# define hmulr_u(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 0)
261# define hmuli(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 1)
262# define hmuli_u(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 0)
4a71579b
PC
263# define umulr(r0) unr(X86_IMUL, r0)
264# define umulr_u(r0) unr(X86_MUL, r0)
265# define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1)
266# define qmulr_u(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 0)
267# define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
268static void _iqmulr(jit_state_t*, jit_int32_t, jit_int32_t,
269 jit_int32_t,jit_int32_t, jit_bool_t);
270# define qmuli(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 1)
271# define qmuli_u(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 0)
272# define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign)
273static void _iqmuli(jit_state_t*, jit_int32_t, jit_int32_t,
274 jit_int32_t,jit_word_t, jit_bool_t);
275# define sign_extend_rdx_rax() _sign_extend_rdx_rax(_jit)
276static void _sign_extend_rdx_rax(jit_state_t*);
277# define idivr(r0) unr(X86_IDIV, r0)
278# define idivr_u(r0) unr(X86_DIV, r0)
279# define divremr(r0, r1, r2, i0, i1) _divremr(_jit, r0, r1, r2, i0, i1)
280static void
281_divremr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
282 jit_bool_t,jit_bool_t);
283# define divremi(r0, r1, i0, i1, i2) _divremi(_jit, r0, r1, i0, i1, i2)
284static void
285_divremi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_bool_t,jit_bool_t);
286# define divr(r0, r1, r2) divremr(r0, r1, r2, 1, 1)
287# define divi(r0, r1, i0) divremi(r0, r1, i0, 1, 1)
288# define divr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 1)
289# define divi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 1)
290# define qdivr(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 1)
291# define qdivr_u(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 0)
292# define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign)
293static void _iqdivr(jit_state_t*, jit_int32_t, jit_int32_t,
294 jit_int32_t,jit_int32_t, jit_bool_t);
295# define qdivi(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 1)
296# define qdivi_u(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 0)
297# define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign)
298static void _iqdivi(jit_state_t*, jit_int32_t, jit_int32_t,
299 jit_int32_t,jit_word_t, jit_bool_t);
300# define remr(r0, r1, r2) divremr(r0, r1, r2, 1, 0)
301# define remi(r0, r1, i0) divremi(r0, r1, i0, 1, 0)
302# define remr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 0)
303# define remi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 0)
304# define iandr(r0, r1) alur(X86_AND, r0, r1)
305# define andr(r0, r1, r2) _andr(_jit, r0, r1, r2)
306static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
307# define iandi(r0, i0) alui(X86_AND, r0, i0)
308# define andi(r0, r1, i0) _andi(_jit, r0, r1, i0)
309static void _andi(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
310# define iorr(r0, r1) alur(X86_OR, r0, r1)
311# define orr(r0, r1, r2) _orr(_jit, r0, r1, r2)
312static void _orr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
313# define iori(r0, i0) alui(X86_OR, r0, i0)
314# define ori(r0, r1, i0) _ori(_jit, r0, r1, i0)
315static void _ori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
316# define ixorr(r0, r1) alur(X86_XOR, r0, r1)
317# define xorr(r0, r1, r2) _xorr(_jit, r0, r1, r2)
318static void _xorr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
319# define ixori(r0, i0) alui(X86_XOR, r0, i0)
320# define xori(r0, r1, i0) _xori(_jit, r0, r1, i0)
321static void _xori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
322# define irotshr(code, r0) _irotshr(_jit, code, r0)
323static void _irotshr(jit_state_t*, jit_int32_t, jit_int32_t);
324# define rotshr(code, r0, r1, r2) _rotshr(_jit, code, r0, r1, r2)
325static void
326_rotshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
327# define irotshi(code, r0, i0) _irotshi(_jit, code, r0, i0)
328static void _irotshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
329# define rotshi(code, r0, r1, i0) _rotshi(_jit, code, r0, r1, i0)
330static void
331_rotshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
332# define lshr(r0, r1, r2) rotshr(X86_SHL, r0, r1, r2)
ba86ff93
PC
333# define qlshr(r0, r1, r2, r3) xlshr(1, r0, r1, r2, r3)
334# define xlshr(s, r0, r1, r2, r3) _xlshr(_jit, s, r0, r1, r2, r3)
335static void
336_xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
337# define lshi(r0, r1, i0) _lshi(_jit, r0, r1, i0)
338static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
ba86ff93
PC
339# define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0)
340# define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0)
341static void
342_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
343# define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3)
344# define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0)
4a71579b
PC
345# define rshr(r0, r1, r2) rotshr(X86_SAR, r0, r1, r2)
346# define rshi(r0, r1, i0) rotshi(X86_SAR, r0, r1, i0)
347# define rshr_u(r0, r1, r2) rotshr(X86_SHR, r0, r1, r2)
348# define rshi_u(r0, r1, i0) rotshi(X86_SHR, r0, r1, i0)
ba86ff93
PC
349# define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3)
350# define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3)
351# define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3)
352static void
353_xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
354# define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0)
355# define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0)
356# define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0)
357static void
358_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
359# define lrotr(r0, r1, r2) rotshr(X86_ROL, r0, r1, r2)
360# define lroti(r0, r1, i0) rotshi(X86_ROL, r0, r1, i0)
361# define rrotr(r0, r1, r2) rotshr(X86_ROR, r0, r1, r2)
362# define rroti(r0, r1, i0) rotshi(X86_ROR, r0, r1, i0)
4a71579b
PC
363# define unr(code, r0) _unr(_jit, code, r0)
364static void _unr(jit_state_t*, jit_int32_t, jit_int32_t);
365# define inegr(r0) unr(X86_NEG, r0)
366# define negr(r0, r1) _negr(_jit, r0, r1)
367static void _negr(jit_state_t*, jit_int32_t, jit_int32_t);
368# define icomr(r0) unr(X86_NOT, r0)
369# define comr(r0, r1) _comr(_jit, r0, r1)
370static void _comr(jit_state_t*, jit_int32_t, jit_int32_t);
371# if USE_INC_DEC
372# define incr(r0, r1) _incr(_jit, r0, r1)
373static void _incr(jit_state_t*, jit_int32_t, jit_int32_t);
374# define decr(r0, r1) _decr(_jit, r0, r1)
375static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
376# endif
79bfeef6
PC
377# define clor(r0, r1) _clor(_jit, r0, r1)
378static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
379# define clzr(r0, r1) _clzr(_jit, r0, r1)
380static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
381# define ctor(r0, r1) _ctor(_jit, r0, r1)
382static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
383# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
384static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
ba86ff93
PC
385# define rbitr(r0, r1) _rbitr(_jit, r0, r1)
386static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t);
387# define popcntr(r0, r1) _popcntr(_jit, r0, r1)
388static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t);
4a71579b
PC
389# define cr(code, r0, r1, r2) _cr(_jit, code, r0, r1, r2)
390static void
391_cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
392# define ci(code, r0, r1, i0) _ci(_jit, code, r0, r1, i0)
393static void
394_ci(jit_state_t *_jit, jit_int32_t, jit_int32_t, jit_int32_t, jit_word_t);
395# define ci0(code, r0, r1) _ci0(_jit, code, r0, r1)
396static void _ci0(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
397# define ltr(r0, r1, r2) _ltr(_jit, r0, r1, r2)
398static void _ltr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
399# define lti(r0, r1, i0) _lti(_jit, r0, r1, i0)
400static void _lti(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
401# define ltr_u(r0, r1, r2) _ltr_u(_jit, r0, r1, r2)
402static void _ltr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
403# define lti_u(r0, r1, i0) ci(X86_CC_B, r0, r1, i0)
404# define ler(r0, r1, r2) _ler(_jit, r0, r1, r2)
405static void _ler(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
406# define lei(r0, r1, i0) ci(X86_CC_LE, r0, r1, i0)
407# define ler_u(r0, r1, r2) _ler_u(_jit, r0, r1, r2)
408static void _ler_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
409# define lei_u(r0, r1, i0) _lei_u(_jit, r0, r1, i0)
410static void _lei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
411# define eqr(r0, r1, r2) _eqr(_jit, r0, r1, r2)
412static void _eqr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
413# define eqi(r0, r1, i0) _eqi(_jit, r0, r1, i0)
414static void _eqi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
415# define ger(r0, r1, r2) _ger(_jit, r0, r1, r2)
416static void _ger(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
417# define gei(r0, r1, i0) _gei(_jit, r0, r1, i0)
418static void _gei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
419# define ger_u(r0, r1, r2) _ger_u(_jit, r0, r1, r2)
420static void _ger_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
421# define gei_u(r0, r1, i0) _gei_u(_jit, r0, r1, i0)
422static void _gei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
423# define gtr(r0, r1, r2) _gtr(_jit, r0, r1, r2)
424static void _gtr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
425# define gti(r0, r1, i0) _ci(_jit, X86_CC_G, r0, r1, i0)
426# define gtr_u(r0, r1, r2) _gtr_u(_jit, r0, r1, r2)
427static void _gtr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
428# define gti_u(r0, r1, i0) _gti_u(_jit, r0, r1, i0)
429static void _gti_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
430# define ner(r0, r1, r2) _ner(_jit, r0, r1, r2)
431static void _ner(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
432# define nei(r0, r1, i0) _nei(_jit, r0, r1, i0)
433static void _nei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
434# define movr(r0, r1) _movr(_jit, r0, r1)
435static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
436# define imovi(r0, i0) _imovi(_jit, r0, i0)
437static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
438# define movi(r0, i0) _movi(_jit, r0, i0)
79bfeef6
PC
439static
440# if CAN_RIP_ADDRESS
441jit_word_t
442# else
443void
444# endif
445_movi(jit_state_t*, jit_int32_t, jit_word_t);
4a71579b
PC
446# define movi_p(r0, i0) _movi_p(_jit, r0, i0)
447static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
448# define movcr(r0, r1) _movcr(_jit, r0, r1)
449static void _movcr(jit_state_t*,jit_int32_t,jit_int32_t);
450# define movcr_u(r0, r1) _movcr_u(_jit, r0, r1)
451static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
452# define movsr(r0, r1) _movsr(_jit, r0, r1)
453static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
454# define movsr_u(r0, r1) _movsr_u(_jit, r0, r1)
455static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
ba3814c1
PC
456# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
457static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
458 jit_int32_t,jit_int32_t,jit_word_t);
459#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
460#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
1f22b268
PC
461#define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2)
462static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
463#define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2)
464static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
4a71579b
PC
465# if __X64 && !__X64_32
466# define movir(r0, r1) _movir(_jit, r0, r1)
467static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
468# define movir_u(r0, r1) _movir_u(_jit, r0, r1)
469static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
470# endif
40a44dcb
PC
471# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
472static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
473# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
474static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
4a71579b 475# if __X64 && !__X64_32
40a44dcb
PC
476#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1)
477static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
4a71579b 478#endif
ba86ff93
PC
479# define extr(r0, r1, i0, i1) _extr(_jit, r0, r1, i0, i1)
480static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
481# define extr_u(r0, r1, i0, i1) _extr_u(_jit, r0, r1, i0, i1)
482static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
483# define depr(r0, r1, i0, i1) _depr(_jit, r0, r1, i0, i1)
484static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
4a71579b
PC
485# define extr_c(r0, r1) _extr_c(_jit, r0, r1)
486static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
487# define extr_uc(r0, r1) _extr_uc(_jit, r0, r1)
488static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
489# define extr_s(r0, r1) movsr(r0, r1)
490# define extr_us(r0, r1) movsr_u(r0, r1)
491# if __X64 && !__X64_32
492# define extr_i(r0, r1) movir(r0, r1)
493# define extr_ui(r0, r1) movir_u(r0, r1)
494# endif
495# define ldr_c(r0, r1) _ldr_c(_jit, r0, r1)
496static void _ldr_c(jit_state_t*, jit_int32_t, jit_int32_t);
497# define ldi_c(r0, i0) _ldi_c(_jit, r0, i0)
498static void _ldi_c(jit_state_t*, jit_int32_t, jit_word_t);
499# define ldr_uc(r0, r1) _ldr_uc(_jit, r0, r1)
500static void _ldr_uc(jit_state_t*, jit_int32_t, jit_int32_t);
501# define ldi_uc(r0, i0) _ldi_uc(_jit, r0, i0)
502static void _ldi_uc(jit_state_t*, jit_int32_t, jit_word_t);
503# define ldr_s(r0, r1) _ldr_s(_jit, r0, r1)
504static void _ldr_s(jit_state_t*, jit_int32_t, jit_int32_t);
505# define ldi_s(r0, i0) _ldi_s(_jit, r0, i0)
506static void _ldi_s(jit_state_t*, jit_int32_t, jit_word_t);
507# define ldr_us(r0, r1) _ldr_us(_jit, r0, r1)
508static void _ldr_us(jit_state_t*, jit_int32_t, jit_int32_t);
509# define ldi_us(r0, i0) _ldi_us(_jit, r0, i0)
510static void _ldi_us(jit_state_t*, jit_int32_t, jit_word_t);
511# if __X32 || !__X64_32
512# define ldr_i(r0, r1) _ldr_i(_jit, r0, r1)
513static void _ldr_i(jit_state_t*, jit_int32_t, jit_int32_t);
514# define ldi_i(r0, i0) _ldi_i(_jit, r0, i0)
515static void _ldi_i(jit_state_t*, jit_int32_t, jit_word_t);
516# endif
517# if __X64
518# if __X64_32
519# define ldr_i(r0, r1) _ldr_ui(_jit, r0, r1)
520# define ldi_i(r0, i0) _ldi_ui(_jit, r0, i0)
521# else
522# define ldr_ui(r0, r1) _ldr_ui(_jit, r0, r1)
523# define ldi_ui(r0, i0) _ldi_ui(_jit, r0, i0)
524# endif
525static void _ldr_ui(jit_state_t*, jit_int32_t, jit_int32_t);
526static void _ldi_ui(jit_state_t*, jit_int32_t, jit_word_t);
527# if !__X64_32
528# define ldr_l(r0, r1) _ldr_l(_jit, r0, r1)
529static void _ldr_l(jit_state_t*, jit_int32_t, jit_int32_t);
530# define ldi_l(r0, i0) _ldi_l(_jit, r0, i0)
531static void _ldi_l(jit_state_t*, jit_int32_t, jit_word_t);
532# endif
533# endif
534# define ldxr_c(r0, r1, r2) _ldxr_c(_jit, r0, r1, r2)
535static void _ldxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
536# define ldxi_c(r0, r1, i0) _ldxi_c(_jit, r0, r1, i0)
537static void _ldxi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
538# define ldxr_uc(r0, r1, r2) _ldxr_uc(_jit, r0, r1, r2)
539static void _ldxr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
540# define ldxi_uc(r0, r1, i0) _ldxi_uc(_jit, r0, r1, i0)
541static void _ldxi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
542# define ldxr_s(r0, r1, r2) _ldxr_s(_jit, r0, r1, r2)
543static void _ldxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
544# define ldxi_s(r0, r1, i0) _ldxi_s(_jit, r0, r1, i0)
545static void _ldxi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
546# define ldxr_us(r0, r1, r2) _ldxr_us(_jit, r0, r1, r2)
547static void _ldxr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
548# define ldxi_us(r0, r1, i0) _ldxi_us(_jit, r0, r1, i0)
549static void _ldxi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
550# if __X32 || !__X64_32
551# define ldxr_i(r0, r1, r2) _ldxr_i(_jit, r0, r1, r2)
552static void _ldxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
553# define ldxi_i(r0, r1, i0) _ldxi_i(_jit, r0, r1, i0)
554static void _ldxi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
555# endif
556# if __X64
557# if __X64_32
558# define ldxr_i(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2)
559# define ldxi_i(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0)
560# else
561# define ldxr_ui(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2)
562# define ldxi_ui(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0)
563# endif
564static void _ldxr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
565static void _ldxi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
566# if !__X64_32
567# define ldxr_l(r0, r1, r2) _ldxr_l(_jit, r0, r1, r2)
568static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
569# define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0)
570static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
571# endif
572# endif
ba86ff93
PC
573# define unldr(r0, r1, i0) generic_unldr(r0, r1, i0)
574# define unldi(r0, i0, i1) generic_unldi(r0, i0, i1)
575# define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0)
576# define unldi_u(r0, i0, i1) generic_unldi_u(r0, i0, i1)
4a71579b
PC
577# define str_c(r0, r1) _str_c(_jit, r0, r1)
578static void _str_c(jit_state_t*, jit_int32_t, jit_int32_t);
579# define sti_c(i0, r0) _sti_c(_jit, i0, r0)
580static void _sti_c(jit_state_t*, jit_word_t, jit_int32_t);
581# define str_s(r0, r1) _str_s(_jit, r0, r1)
582static void _str_s(jit_state_t*, jit_int32_t, jit_int32_t);
583# define sti_s(i0, r0) _sti_s(_jit, i0, r0)
584static void _sti_s(jit_state_t*, jit_word_t, jit_int32_t);
585# define str_i(r0, r1) _str_i(_jit, r0, r1)
586static void _str_i(jit_state_t*, jit_int32_t, jit_int32_t);
587# define sti_i(i0, r0) _sti_i(_jit, i0, r0)
588static void _sti_i(jit_state_t*, jit_word_t, jit_int32_t);
589# if __X64 && !__X64_32
590# define str_l(r0, r1) _str_l(_jit, r0, r1)
591static void _str_l(jit_state_t*, jit_int32_t, jit_int32_t);
592# define sti_l(i0, r0) _sti_l(_jit, i0, r0)
593static void _sti_l(jit_state_t*, jit_word_t, jit_int32_t);
594# endif
595# define stxr_c(r0, r1, r2) _stxr_c(_jit, r0, r1, r2)
596static void _stxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
597# define stxi_c(i0, r0, r1) _stxi_c(_jit, i0, r0, r1)
598static void _stxi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
599# define stxr_s(r0, r1, r2) _stxr_s(_jit, r0, r1, r2)
600static void _stxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
601# define stxi_s(i0, r0, r1) _stxi_s(_jit, i0, r0, r1)
602static void _stxi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
603# define stxr_i(r0, r1, r2) _stxr_i(_jit, r0, r1, r2)
604static void _stxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
605# define stxi_i(i0, r0, r1) _stxi_i(_jit, i0, r0, r1)
606static void _stxi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
607# if __X64 && !__X64_32
608# define stxr_l(r0, r1, r2) _stxr_l(_jit, r0, r1, r2)
609static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
610# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1)
611static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
612# endif
ba86ff93
PC
613#define unstr(r0, r1, i0) generic_unstr(r0, r1, i0)
614#define unsti(i0, r0, i1) generic_unsti(i0, r0, i1)
4a71579b
PC
615# define jcc(code, i0) _jcc(_jit, code, i0)
616# define jo(i0) jcc(X86_CC_O, i0)
617# define jno(i0) jcc(X86_CC_NO, i0)
618# define jnae(i0) jcc(X86_CC_NAE, i0)
619# define jb(i0) jcc(X86_CC_B, i0)
620# define jc(i0) jcc(X86_CC_C, i0)
621# define jae(i0) jcc(X86_CC_AE, i0)
622# define jnb(i0) jcc(X86_CC_NB, i0)
623# define jnc(i0) jcc(X86_CC_NC, i0)
624# define je(i0) jcc(X86_CC_E, i0)
625# define jz(i0) jcc(X86_CC_Z, i0)
626# define jne(i0) jcc(X86_CC_NE, i0)
627# define jnz(i0) jcc(X86_CC_NZ, i0)
628# define jbe(i0) jcc(X86_CC_BE, i0)
629# define jna(i0) jcc(X86_CC_NA, i0)
630# define ja(i0) jcc(X86_CC_A, i0)
631# define jnbe(i0) jcc(X86_CC_NBE, i0)
632# define js(i0) jcc(X86_CC_S, i0)
633# define jns(i0) jcc(X86_CC_NS, i0)
634# define jp(i0) jcc(X86_CC_P, i0)
635# define jpe(i0) jcc(X86_CC_PE, i0)
636# define jnp(i0) jcc(X86_CC_NP, i0)
637# define jpo(i0) jcc(X86_CC_PO, i0)
638# define jl(i0) jcc(X86_CC_L, i0)
639# define jnge(i0) jcc(X86_CC_NGE, i0)
640# define jge(i0) jcc(X86_CC_GE, i0)
641# define jnl(i0) jcc(X86_CC_NL, i0)
642# define jle(i0) jcc(X86_CC_LE, i0)
643# define jng(i0) jcc(X86_CC_NG, i0)
644# define jg(i0) jcc(X86_CC_G, i0)
645# define jnle(i0) jcc(X86_CC_NLE, i0)
79bfeef6 646static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t);
4a71579b
PC
647# define jccs(code, i0) _jccs(_jit, code, i0)
648# define jos(i0) jccs(X86_CC_O, i0)
649# define jnos(i0) jccs(X86_CC_NO, i0)
650# define jnaes(i0) jccs(X86_CC_NAE, i0)
651# define jbs(i0) jccs(X86_CC_B, i0)
652# define jcs(i0) jccs(X86_CC_C, i0)
653# define jaes(i0) jccs(X86_CC_AE, i0)
654# define jnbs(i0) jccs(X86_CC_NB, i0)
655# define jncs(i0) jccs(X86_CC_NC, i0)
656# define jes(i0) jccs(X86_CC_E, i0)
657# define jzs(i0) jccs(X86_CC_Z, i0)
658# define jnes(i0) jccs(X86_CC_NE, i0)
659# define jnzs(i0) jccs(X86_CC_NZ, i0)
660# define jbes(i0) jccs(X86_CC_BE, i0)
661# define jnas(i0) jccs(X86_CC_NA, i0)
662# define jas(i0) jccs(X86_CC_A, i0)
663# define jnbes(i0) jccs(X86_CC_NBE, i0)
664# define jss(i0) jccs(X86_CC_S, i0)
665# define jnss(i0) jccs(X86_CC_NS, i0)
666# define jps(i0) jccs(X86_CC_P, i0)
667# define jpes(i0) jccs(X86_CC_PE, i0)
668# define jnps(i0) jccs(X86_CC_NP, i0)
669# define jpos(i0) jccs(X86_CC_PO, i0)
670# define jls(i0) jccs(X86_CC_L, i0)
671# define jnges(i0) jccs(X86_CC_NGE, i0)
672# define jges(i0) jccs(X86_CC_GE, i0)
673# define jnls(i0) jccs(X86_CC_NL, i0)
674# define jles(i0) jccs(X86_CC_LE, i0)
675# define jngs(i0) jccs(X86_CC_NG, i0)
676# define jgs(i0) jccs(X86_CC_G, i0)
677# define jnles(i0) jccs(X86_CC_NLE, i0)
79bfeef6 678static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t);
4a71579b 679# define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1)
79bfeef6
PC
680static jit_word_t _jcr(jit_state_t*,
681 jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
4a71579b 682# define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1)
79bfeef6
PC
683static jit_word_t _jci(jit_state_t*,
684 jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
4a71579b 685# define jci0(code, i0, r0) _jci0(_jit, code, i0, r0)
79bfeef6 686static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
4a71579b
PC
687# define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1)
688static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
689# define blti(i0, r0, i1) _blti(_jit, i0, r0, i1)
690static jit_word_t _blti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
691# define bltr_u(i0, r0, r1) _bltr_u(_jit, i0, r0, r1)
692static jit_word_t _bltr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
693# define blti_u(i0, r0, i1) _blti_u(_jit, i0, r0, i1)
694static jit_word_t _blti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
695# define bler(i0, r0, r1) _bler(_jit, i0, r0, r1)
696static jit_word_t _bler(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
697# define blei(i0, r0, i1) _blei(_jit, i0, r0, i1)
698static jit_word_t _blei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
699# define bler_u(i0, r0, r1) _bler_u(_jit, i0, r0, r1)
700static jit_word_t _bler_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
701# define blei_u(i0, r0, i1) _blei_u(_jit, i0, r0, i1)
702static jit_word_t _blei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
703# define beqr(i0, r0, r1) _beqr(_jit, i0, r0, r1)
704static jit_word_t _beqr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
705# define beqi(i0, r0, i1) _beqi(_jit, i0, r0, i1)
706static jit_word_t _beqi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
707# define bger(i0, r0, r1) _bger(_jit, i0, r0, r1)
708static jit_word_t _bger(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
709# define bgei(i0, r0, i1) _bgei(_jit, i0, r0, i1)
710static jit_word_t _bgei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
711# define bger_u(i0, r0, r1) _bger_u(_jit, i0, r0, r1)
712static jit_word_t _bger_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
713# define bgei_u(i0, r0, i1) _bgei_u(_jit, i0, r0, i1)
714static jit_word_t _bgei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
715# define bgtr(i0, r0, r1) _bgtr(_jit, i0, r0, r1)
716static jit_word_t _bgtr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
717# define bgti(i0, r0, i1) _bgti(_jit, i0, r0, i1)
718static jit_word_t _bgti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
719# define bgtr_u(i0, r0, r1) _bgtr_u(_jit, i0, r0, r1)
720static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
721# define bgti_u(i0, r0, i1) _bgti_u(_jit, i0, r0, i1)
722static jit_word_t _bgti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
723# define bner(i0, r0, r1) _bner(_jit, i0, r0, r1)
724static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
725# define bnei(i0, r0, i1) _bnei(_jit, i0, r0, i1)
726static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
727# define bmsr(i0, r0, r1) _bmsr(_jit, i0, r0, r1)
728static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
729# define bmsi(i0, r0, i1) _bmsi(_jit, i0, r0, i1)
730static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
731# define bmcr(i0, r0, r1) _bmcr(_jit, i0, r0, r1)
732static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
733# define bmci(i0, r0, i1) _bmci(_jit, i0, r0, i1)
734static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
735# define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1)
736static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
737# define boaddi(i0, r0, i1) _boaddi(_jit, i0, r0, i1)
738static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
739# define boaddr_u(i0, r0, r1) _boaddr_u(_jit, i0, r0, r1)
740static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
741# define boaddi_u(i0, r0, i1) _boaddi_u(_jit, i0, r0, i1)
742static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
743# define bxaddr(i0, r0, r1) _bxaddr(_jit, i0, r0, r1)
744static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
745# define bxaddi(i0, r0, i1) _bxaddi(_jit, i0, r0, i1)
746static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
747# define bxaddr_u(i0, r0, r1) _bxaddr_u(_jit, i0, r0, r1)
748static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
749# define bxaddi_u(i0, r0, i1) _bxaddi_u(_jit, i0, r0, i1)
750static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
751# define bosubr(i0, r0, r1) _bosubr(_jit, i0, r0, r1)
752static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
753# define bosubi(i0, r0, i1) _bosubi(_jit, i0, r0, i1)
754static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
755# define bosubr_u(i0, r0, r1) _bosubr_u(_jit, i0, r0, r1)
756static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
757# define bosubi_u(i0, r0, i1) _bosubi_u(_jit, i0, r0, i1)
758static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
759# define bxsubr(i0, r0, r1) _bxsubr(_jit, i0, r0, r1)
760static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
761# define bxsubi(i0, r0, i1) _bxsubi(_jit, i0, r0, i1)
762static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
763# define bxsubr_u(i0, r0, r1) _bxsubr_u(_jit, i0, r0, r1)
764static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
765# define bxsubi_u(i0, r0, i1) _bxsubi_u(_jit, i0, r0, i1)
766static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
767# define callr(r0) _callr(_jit, r0)
768static void _callr(jit_state_t*, jit_int32_t);
769# define calli(i0) _calli(_jit, i0)
770static jit_word_t _calli(jit_state_t*, jit_word_t);
519a9ea1
PC
771# if __X64
772# define calli_p(i0) _calli_p(_jit, i0)
773static jit_word_t _calli_p(jit_state_t*, jit_word_t);
774# else
775# define calli_p(i0) calli(i0)
776# endif
4a71579b
PC
777# define jmpr(r0) _jmpr(_jit, r0)
778static void _jmpr(jit_state_t*, jit_int32_t);
779# define jmpi(i0) _jmpi(_jit, i0)
780static jit_word_t _jmpi(jit_state_t*, jit_word_t);
519a9ea1
PC
781# if __X64
782# define jmpi_p(i0) _jmpi_p(_jit, i0)
783static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
784# else
785# define jmpi_p(i0) jmpi(i0)
786# endif
4a71579b 787# define jmpsi(i0) _jmpsi(_jit, i0)
79bfeef6 788static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t);
4a71579b
PC
789# define prolog(node) _prolog(_jit, node)
790static void _prolog(jit_state_t*, jit_node_t*);
791# define epilog(node) _epilog(_jit, node)
792static void _epilog(jit_state_t*, jit_node_t*);
793# define vastart(r0) _vastart(_jit, r0)
794static void _vastart(jit_state_t*, jit_int32_t);
795# define vaarg(r0, r1) _vaarg(_jit, r0, r1)
796static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
797# define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0)
798static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
79bfeef6
PC
799# define patch_at(instr, label) _patch_at(_jit, instr, label)
800static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
4a71579b
PC
801# if !defined(HAVE_FFSL)
802# if __X32
803# define ffsl(i) __builtin_ffs(i)
804# else
805# define ffsl(l) __builtin_ffsl(l)
806# endif
807# endif
1f22b268 808# define jit_cmov_p() jit_cpu.cmov
4a71579b
PC
809#endif
810
811#if CODE
812static void
813_rex(jit_state_t *_jit, jit_int32_t l, jit_int32_t w,
814 jit_int32_t r, jit_int32_t x, jit_int32_t b)
815{
816#if __X64
817 jit_int32_t v = 0x40 | (w << 3);
818
819 if (r != _NOREG)
820 v |= (r & 8) >> 1;
821 if (x != _NOREG)
822 v |= (x & 8) >> 2;
823 if (b != _NOREG)
824 v |= (b & 8) >> 3;
825 if (l || v != 0x40)
826 ic(v);
827#endif
828}
829
830static void
831_rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
832 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
833{
834 if (ri == _NOREG) {
835 if (rb == _NOREG) {
79bfeef6
PC
836 /* Use ms == _SCL8 to tell it is a %rip relative displacement */
837#if __X64
838 if (ms == _SCL8)
839#endif
840 mrm(0x00, r7(rd), 0x05);
841#if __X64
842 else {
843 mrm(0x00, r7(rd), 0x04);
844 sib(_SCL1, 0x04, 0x05);
845 }
4a71579b
PC
846#endif
847 ii(md);
848 }
849 else if (r7(rb) == _RSP_REGNO) {
850 if (md == 0) {
851 mrm(0x00, r7(rd), 0x04);
852 sib(ms, 0x04, 0x04);
853 }
854 else if ((jit_int8_t)md == md) {
855 mrm(0x01, r7(rd), 0x04);
856 sib(ms, 0x04, 0x04);
857 ic(md);
858 }
859 else {
860 mrm(0x02, r7(rd), 0x04);
861 sib(ms, 0x04, 0x04);
862 ii(md);
863 }
864 }
865 else {
866 if (md == 0 && r7(rb) != _RBP_REGNO)
867 mrm(0x00, r7(rd), r7(rb));
868 else if ((jit_int8_t)md == md) {
869 mrm(0x01, r7(rd), r7(rb));
870 ic(md);
871 }
872 else {
873 mrm(0x02, r7(rd), r7(rb));
874 ii(md);
875 }
876 }
877 }
878 else if (rb == _NOREG) {
879 mrm(0x00, r7(rd), 0x04);
880 sib(ms, r7(ri), 0x05);
881 ii(md);
882 }
883 else if (r8(ri) != _RSP_REGNO) {
884 if (md == 0 && r7(rb) != _RBP_REGNO) {
885 mrm(0x00, r7(rd), 0x04);
886 sib(ms, r7(ri), r7(rb));
887 }
888 else if ((jit_int8_t)md == md) {
889 mrm(0x01, r7(rd), 0x04);
890 sib(ms, r7(ri), r7(rb));
891 ic(md);
892 }
893 else {
894 mrm(0x02, r7(rd), 0x04);
895 sib(ms, r7(ri), r7(rb));
896 ic(md);
897 }
898 }
899 else {
900 fprintf(stderr, "illegal index register");
901 abort();
902 }
903}
904
ba86ff93
PC
905static void
906_vex(jit_state_t *_jit, jit_int32_t r, jit_int32_t x, jit_int32_t b,
907 jit_int32_t map, jit_int32_t w, jit_int32_t vvvv, jit_int32_t l,
908 jit_int32_t pp)
909{
910 jit_int32_t v;
911 if (r == _NOREG) r = 0;
912 if (x == _NOREG) x = 0;
913 if (b == _NOREG) b = 0;
914 if (map == 1 && w == 0 && ((x|b) & 8) == 0) {
915 /* Two byte prefix */
916 ic(0xc5);
917 /* ~R */
918 v = (r & 8) ? 0 : 0x80;
919 }
920 else {
921 /* Three byte prefix */
922 if (map >= 8)
923 ic(0x8f);
924 else
925 ic(0xc4);
926 /* map_select */
927 v = map;
928 /* ~R */
929 if (!(r & 8)) v |= 0x80;
930 /* ~X */
931 if (!(x & 8)) v |= 0x40;
932 /* ~B */
933 if (!(b & 8)) v |= 0x20;
934 ic(v);
935 /* W */
936 v = w ? 0x80 : 0;
937 }
938 /* ~vvvv */
939 v |= (~vvvv & 0x0f) << 3;
940 /* L */
941 if (l) v |= 0x04;
942 /* pp */
943 v |= pp;
944 ic(v);
945}
946
4a71579b
PC
947static void
948_nop(jit_state_t *_jit, jit_int32_t count)
949{
c0c16242
PC
950 jit_int32_t i;
951 while (count) {
952 if (count > 9)
953 i = 9;
954 else
955 i = count;
956 switch (i) {
957 case 0:
958 break;
959 case 1: /* NOP */
960 ic(0x90); break;
961 case 2: /* 66 NOP */
962 ic(0x66); ic(0x90);
963 break;
964 case 3: /* NOP DWORD ptr [EAX] */
965 ic(0x0f); ic(0x1f); ic(0x00);
966 break;
967 case 4: /* NOP DWORD ptr [EAX + 00H] */
968 ic(0x0f); ic(0x1f); ic(0x40); ic(0x00);
969 break;
970 case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
971 ic(0x0f); ic(0x1f); ic(0x44); ic(0x00);
972 ic(0x00);
973 break;
974 case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
975 ic(0x66); ic(0x0f); ic(0x1f); ic(0x44);
976 ic(0x00); ic(0x00);
977 break;
978 case 7: /* NOP DWORD ptr [EAX + 00000000H] */
979 ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000);
980 break;
981 case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
982 ic(0x0f); ic(0x1f); ic(0x84); ic(0x00);
983 ii(0x0000);
984 break;
985 case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
986 ic(0x66); ic(0x0f); ic(0x1f); ic(0x84);
987 ic(0x00); ii(0x0000);
988 break;
989 }
990 count -= i;
4a71579b
PC
991 }
992}
4a71579b
PC
993static void
994_lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
995 jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
996{
997 rex(0, WIDE, rd, ri, rb);
998 ic(0x8d);
999 rx(rd, md, rb, ri, ms);
1000}
1001
1002static void
1003_pushr(jit_state_t *_jit, jit_int32_t r0)
1004{
1005 rex(0, WIDE, 0, 0, r0);
1006 ic(0x50 | r7(r0));
1007}
1008
1009static void
1010_popr(jit_state_t *_jit, jit_int32_t r0)
1011{
1012 rex(0, WIDE, 0, 0, r0);
1013 ic(0x58 | r7(r0));
1014}
1015
1016static void
1017_xchgr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1018{
1019 rex(0, WIDE, r1, _NOREG, r0);
1020 ic(0x87);
1021 mrm(0x03, r7(r1), r7(r0));
1022}
1023
1024static void
1025_testr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1026{
1027 rex(0, WIDE, r1, _NOREG, r0);
1028 ic(0x85);
1029 mrm(0x03, r7(r1), r7(r0));
1030}
1031
1032static void
1033_testi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1034{
1035 rex(0, WIDE, _NOREG, _NOREG, r0);
1036 if (r0 == _RAX_REGNO)
1037 ic(0xa9);
1038 else {
1039 ic(0xf7);
1040 mrm(0x03, 0x00, r7(r0));
1041 }
1042 ii(i0);
1043}
1044
1045static void
1046_cc(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1047{
1048 rex(0, 0, _NOREG, _NOREG, r0);
1049 ic(0x0f);
1050 ic(0x90 | code);
1051 mrm(0x03, 0x00, r7(r0));
1052}
1053
1054static void
1055_alur(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
1056{
1057 rex(0, WIDE, r1, _NOREG, r0);
1058 ic(code | 0x01);
1059 mrm(0x03, r7(r1), r7(r0));
1060}
1061
1062static void
1063_alui(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
1064{
1065 jit_int32_t reg;
1066 if (can_sign_extend_int_p(i0)) {
1067 rex(0, WIDE, _NOREG, _NOREG, r0);
1068 if ((jit_int8_t)i0 == i0) {
1069 ic(0x83);
1070 ic(0xc0 | code | r7(r0));
1071 ic(i0);
1072 }
1073 else {
1074 if (r0 == _RAX_REGNO)
1075 ic(code | 0x05);
1076 else {
1077 ic(0x81);
1078 ic(0xc0 | code | r7(r0));
1079 }
1080 ii(i0);
1081 }
1082 }
1083 else {
1084 reg = jit_get_reg(jit_class_gpr);
1085 movi(rn(reg), i0);
1086 alur(code, r0, rn(reg));
1087 jit_unget_reg(reg);
1088 }
1089}
1090
1091static void
1092_save(jit_state_t *_jit, jit_int32_t r0)
1093{
1094 if (!_jitc->function->regoff[r0]) {
1095 _jitc->function->regoff[r0] = jit_allocai(sizeof(jit_word_t));
1096 _jitc->again = 1;
1097 }
1098 assert(!jit_regset_tstbit(&_jitc->regsav, r0));
1099 jit_regset_setbit(&_jitc->regsav, r0);
1100 stxi(_jitc->function->regoff[r0], _RBP_REGNO, r0);
1101}
1102
1103static void
1104_load(jit_state_t *_jit, jit_int32_t r0)
1105{
1106 assert(_jitc->function->regoff[r0]);
1107 assert(jit_regset_tstbit(&_jitc->regsav, r0));
1108 jit_regset_clrbit(&_jitc->regsav, r0);
1109 ldxi(r0, _RBP_REGNO, _jitc->function->regoff[r0]);
1110}
1111
1112static void
1113_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1114{
1115 if (r0 == r1)
1116 iaddr(r0, r2);
1117 else if (r0 == r2)
1118 iaddr(r0, r1);
1119 else
1120 lea(0, r1, r2, _SCL1, r0);
1121}
1122
1123static void
1124_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1125{
1126 jit_int32_t reg;
1127 if (i0 == 0)
1128 movr(r0, r1);
1129#if USE_INC_DEC
1130 else if (i0 == 1)
1131 incr(r0, r1);
1132 else if (i0 == -1)
1133 decr(r0, r1);
1134#endif
1135 else if (can_sign_extend_int_p(i0)) {
1136 if (r0 == r1)
1137 iaddi(r0, i0);
1138 else
1139 lea(i0, r1, _NOREG, _SCL1, r0);
1140 }
1141 else if (r0 != r1) {
1142 movi(r0, i0);
1143 iaddr(r0, r1);
1144 }
1145 else {
1146 reg = jit_get_reg(jit_class_gpr);
1147 movi(rn(reg), i0);
1148 iaddr(r0, rn(reg));
1149 jit_unget_reg(reg);
1150 }
1151}
1152
1153static void
1154_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1155{
1156 if (r0 == r2)
1157 iaddr(r0, r1);
1158 else {
1159 movr(r0, r1);
1160 iaddr(r0, r2);
1161 }
1162}
1163
1164static void
1165_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1166{
1167 jit_int32_t reg;
1168 if (can_sign_extend_int_p(i0)) {
1169 movr(r0, r1);
1170 iaddi(r0, i0);
1171 }
1172 else if (r0 == r1) {
1173 reg = jit_get_reg(jit_class_gpr);
1174 movi(rn(reg), i0);
1175 iaddr(r0, rn(reg));
1176 jit_unget_reg(reg);
1177 }
1178 else {
1179 movi(r0, i0);
1180 iaddr(r0, r1);
1181 }
1182}
1183
79bfeef6
PC
1184static void
1185_iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1186{
1187 /* FIXME: this is not doing what I did expect for the simple test case:
1188 * mov $0xffffffffffffffff, %rax -- rax = 0xffffffffffffffff (-1)
1189 * mov $0xffffffffffffffff, %r10 -- r10 = 0xffffffffffffffff (-1)
1190 * mov $0x1, %r11d -- r11 = 1
1191 * xor %rbx, %rbx -- rbx = 0
1192 * (gdb) p $eflags
1193 * $1 = [ PF ZF IF ]
1194 * add %r11, %rax -- r11 = 0x10000000000000000 (0)
1195 * does not fit in 64 bit ^
1196 * (gdb) p $eflags
1197 * $2 = [ CF PF AF ZF IF ]
1198 * adcx %r10, %rbx -- r10 = 0xffffffffffffffff (-1)
1199 * (gdb) p $eflags
1200 * $3 = [ CF PF AF ZF IF ]
1201 * (gdb) p/x $r10
1202 * $4 = 0xffffffffffffffff
1203 * but, r10 should be zero, as it is:
1204 * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF)
1205 * FIXME: maybe should only use ADCX in the third operation onward, that
1206 * is, after the first ADC? In either case, the add -1+0+carry should
1207 * have used and consumed the carry? At least this is what is expected
1208 * in Lightning...
1209 */
1210#if 0
1211 /* Significantly longer instruction, but avoid cpu stalls as only
1212 * the carry flag is used in a sequence. */
1213 if (jit_cpu.adx) {
1214 /* ADCX */
1215 ic(0x66);
1216 rex(0, WIDE, r1, _NOREG, r0);
1217 ic(0x0f);
1218 ic(0x38);
1219 ic(0xf6);
1220 mrm(0x03, r7(r1), r7(r0));
1221 }
1222 else
1223#endif
1224 alur(X86_ADC, r0, r1);
1225}
1226
4a71579b
PC
1227static void
1228_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1229{
1230 if (r0 == r2)
1231 iaddxr(r0, r1);
1232 else {
1233 movr(r0, r1);
1234 iaddxr(r0, r2);
1235 }
1236}
1237
1238static void
1239_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1240{
1241 jit_int32_t reg;
79bfeef6
PC
1242 if (
1243#if 0
1244 /* Do not mix ADC and ADCX */
1245 !jit_cpu.adx &&
1246#endif
1247 can_sign_extend_int_p(i0)) {
4a71579b
PC
1248 movr(r0, r1);
1249 iaddxi(r0, i0);
1250 }
1251 else if (r0 == r1) {
1252 reg = jit_get_reg(jit_class_gpr);
1253 movi(rn(reg), i0);
1254 iaddxr(r0, rn(reg));
1255 jit_unget_reg(reg);
1256 }
1257 else {
1258 movi(r0, i0);
1259 iaddxr(r0, r1);
1260 }
1261}
1262
1263static void
1264_subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1265{
1266 if (r1 == r2)
1267 ixorr(r0, r0);
1268 else if (r0 == r2) {
1269 isubr(r0, r1);
1270 inegr(r0);
1271 }
1272 else {
1273 movr(r0, r1);
1274 isubr(r0, r2);
1275 }
1276}
1277
1278static void
1279_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1280{
1281 jit_int32_t reg;
1282 if (i0 == 0)
1283 movr(r0, r1);
1284#if USE_INC_DEC
1285 else if (i0 == 1)
1286 decr(r0, r1);
1287 else if (i0 == -1)
1288 incr(r0, r1);
1289#endif
1290 else if (can_sign_extend_int_p(i0)) {
1291 if (r0 == r1)
1292 isubi(r0, i0);
1293 else
1294 lea(-i0, r1, _NOREG, _SCL1, r0);
1295 }
1296 else if (r0 != r1) {
1297 movi(r0, -i0);
1298 iaddr(r0, r1);
1299 }
1300 else {
1301 reg = jit_get_reg(jit_class_gpr);
1302 movi(rn(reg), i0);
1303 isubr(r0, rn(reg));
1304 jit_unget_reg(reg);
1305 }
1306}
1307
1308static void
1309_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1310{
1311 jit_int32_t reg;
1312 if (r0 == r2 && r0 != r1) {
1313 reg = jit_get_reg(jit_class_gpr);
1314 movr(rn(reg), r0);
1315 movr(r0, r1);
1316 isubr(r0, rn(reg));
1317 jit_unget_reg(reg);
1318 }
1319 else {
1320 movr(r0, r1);
1321 isubr(r0, r2);
1322 }
1323}
1324
1325static void
1326_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1327{
1328 jit_int32_t reg;
1329 movr(r0, r1);
1330 if (can_sign_extend_int_p(i0))
1331 isubi(r0, i0);
1332 else {
1333 reg = jit_get_reg(jit_class_gpr);
1334 movi(rn(reg), i0);
1335 isubr(r0, rn(reg));
1336 jit_unget_reg(reg);
1337 }
1338}
1339
1340static void
1341_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1342{
1343 jit_int32_t reg;
1344 if (r0 == r2 && r0 != r1) {
1345 reg = jit_get_reg(jit_class_gpr);
1346 movr(rn(reg), r0);
1347 movr(r0, r1);
1348 isubxr(r0, rn(reg));
1349 jit_unget_reg(reg);
1350 }
1351 else {
1352 movr(r0, r1);
1353 isubxr(r0, r2);
1354 }
1355}
1356
1357static void
1358_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1359{
1360 jit_int32_t reg;
1361 movr(r0, r1);
1362 if (can_sign_extend_int_p(i0))
1363 isubxi(r0, i0);
1364 else {
1365 reg = jit_get_reg(jit_class_gpr);
1366 imovi(rn(reg), i0);
1367 isubxr(r0, rn(reg));
1368 jit_unget_reg(reg);
1369 }
1370}
1371
1372static void
1373_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1374{
1375 subi(r0, r1, i0);
1376 negr(r0, r0);
1377}
1378
1379static void
1380_imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1381{
1382 rex(0, WIDE, r0, _NOREG, r1);
1383 ic(0x0f);
1384 ic(0xaf);
1385 mrm(0x03, r7(r0), r7(r1));
1386}
1387
1388static void
1389_imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1390{
1391 jit_int32_t reg;
1392 if (can_sign_extend_int_p(i0)) {
1393 rex(0, WIDE, r0, _NOREG, r1);
1394 if ((jit_int8_t)i0 == i0) {
1395 ic(0x6b);
1396 mrm(0x03, r7(r0), r7(r1));
1397 ic(i0);
1398 }
1399 else {
1400 ic(0x69);
1401 mrm(0x03, r7(r0), r7(r1));
1402 ii(i0);
1403 }
1404 }
1405 else {
1406 reg = jit_get_reg(jit_class_gpr);
1407 movi(rn(reg), i0);
1408 imulr(r0, rn(reg));
1409 jit_unget_reg(reg);
1410 }
1411}
1412
1413static void
1414_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1415{
1416 if (r0 == r1)
1417 imulr(r0, r2);
1418 else if (r0 == r2)
1419 imulr(r0, r1);
1420 else {
1421 movr(r0, r1);
1422 imulr(r0, r2);
1423 }
1424}
1425
1426static void
1427_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1428{
1429 switch (i0) {
1430 case 0:
1431 ixorr(r0, r0);
1432 break;
1433 case 1:
1434 movr(r0, r1);
1435 break;
1436 case -1:
1437 negr(r0, r1);
1438 break;
1439 case 2:
1440 lea(0, _NOREG, r1, _SCL2, r0);
1441 break;
1442 case 4:
1443 lea(0, _NOREG, r1, _SCL4, r0);
1444 break;
1445 case 8:
1446 lea(0, _NOREG, r1, _SCL8, r0);
1447 break;
1448 default:
1449 if (i0 > 0 && !(i0 & (i0 - 1)))
1450 lshi(r0, r1, ffsl(i0) - 1);
1451 else if (can_sign_extend_int_p(i0))
1452 imuli(r0, r1, i0);
1453 else if (r0 != r1) {
1454 movi(r0, i0);
1455 imulr(r0, r1);
1456 }
1457 else
1458 imuli(r0, r0, i0);
1459 break;
1460 }
1461}
1462
1463#define savset(rn) \
ba86ff93
PC
1464 do { \
1465 if (r0 != rn) { \
1466 sav |= 1 << rn; \
1467 if (r1 != rn && r2 != rn) \
1468 set |= 1 << rn; \
1469 } \
1470 } while (0)
4a71579b 1471#define isavset(rn) \
ba86ff93
PC
1472 do { \
1473 if (r0 != rn) { \
1474 sav |= 1 << rn; \
1475 if (r1 != rn) \
1476 set |= 1 << rn; \
1477 } \
1478 } while (0)
4a71579b 1479#define qsavset(rn) \
ba86ff93
PC
1480 do { \
1481 if (r0 != rn && r1 != rn) { \
1482 sav |= 1 << rn; \
1483 if (r2 != rn && r3 != rn) \
1484 set |= 1 << rn; \
1485 } \
1486 } while (0)
4a71579b 1487#define allocr(rn, rv) \
ba86ff93
PC
1488 do { \
1489 if (set & (1 << rn)) \
1490 (void)jit_get_reg(rv|jit_class_gpr|jit_class_named); \
1491 if (sav & (1 << rn)) { \
1492 if ( jit_regset_tstbit(&_jitc->regsav, rv) || \
1493 !jit_regset_tstbit(&_jitc->reglive, rv)) \
1494 sav &= ~(1 << rn); \
1495 else \
1496 save(rv); \
1497 } \
1498 } while (0)
4a71579b 1499#define clear(rn, rv) \
ba86ff93
PC
1500 do { \
1501 if (set & (1 << rn)) \
1502 jit_unget_reg(rv); \
1503 if (sav & (1 << rn)) \
1504 load(rv); \
1505 } while (0)
1506
4a71579b
PC
1507static void
1508_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1509 jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1510{
1511 jit_int32_t mul;
1512 jit_int32_t sav;
1513 jit_int32_t set;
1514
1515 sav = set = 0;
1516 qsavset(_RDX_REGNO);
1517 qsavset(_RAX_REGNO);
1518 allocr(_RDX_REGNO, _RDX);
1519 allocr(_RAX_REGNO, _RAX);
1520
1521 if (r3 == _RAX_REGNO)
1522 mul = r2;
1523 else {
1524 mul = r3;
1525 movr(_RAX_REGNO, r2);
1526 }
1527 if (sign)
1528 umulr(mul);
1529 else
1530 umulr_u(mul);
1531
ba86ff93
PC
1532 if (r0 != JIT_NOREG) {
1533 if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1534 xchgr(_RAX_REGNO, _RDX_REGNO);
1535 else {
1536 if (r0 != _RDX_REGNO)
1537 movr(r0, _RAX_REGNO);
1538 movr(r1, _RDX_REGNO);
1539 if (r0 == _RDX_REGNO)
1540 movr(r0, _RAX_REGNO);
1541 }
1542 }
4a71579b 1543 else {
ba86ff93 1544 assert(r1 != JIT_NOREG);
4a71579b 1545 movr(r1, _RDX_REGNO);
4a71579b
PC
1546 }
1547
1548 clear(_RDX_REGNO, _RDX);
1549 clear(_RAX_REGNO, _RAX);
1550}
1551
1552static void
1553_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1554 jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1555{
1556 jit_int32_t reg;
1557
1558 if (i0 == 0) {
1559 ixorr(r0, r0);
1560 ixorr(r1, r1);
1561 }
1562 else {
1563 reg = jit_get_reg(jit_class_gpr);
1564 movi(rn(reg), i0);
1565 if (sign)
1566 qmulr(r0, r1, r2, rn(reg));
1567 else
1568 qmulr_u(r0, r1, r2, rn(reg));
1569 jit_unget_reg(reg);
1570 }
1571}
1572
1573static void
1574_sign_extend_rdx_rax(jit_state_t *_jit)
1575{
1576 rex(0, WIDE, 0, 0, 0);
1577 ic(0x99);
1578}
1579
1580static void
1581_divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
1582 jit_bool_t sign, jit_bool_t divide)
1583{
1584 jit_int32_t div;
1585 jit_int32_t reg;
1586 jit_int32_t set;
1587 jit_int32_t sav;
1588 jit_int32_t use;
1589
1590 sav = set = use = 0;
1591 savset(_RDX_REGNO);
1592 savset(_RAX_REGNO);
1593 allocr(_RDX_REGNO, _RDX);
1594 allocr(_RAX_REGNO, _RAX);
1595
1596 if (r2 == _RAX_REGNO) {
1597 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1598 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1599 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1600 jit_class_gpr|jit_class_named);
1601 use = 1;
1602 div = rn(reg);
1603 movr(div, _RAX_REGNO);
1604 if (r1 != _RAX_REGNO)
1605 movr(_RAX_REGNO, r1);
1606 }
1607 else {
1608 if (r0 == r1)
1609 xchgr(r0, _RAX_REGNO);
1610 else {
1611 if (r0 != _RAX_REGNO)
1612 movr(r0, _RAX_REGNO);
1613 if (r1 != _RAX_REGNO)
1614 movr(_RAX_REGNO, r1);
1615 }
1616 div = r0;
1617 }
1618 }
1619 else if (r2 == _RDX_REGNO) {
1620 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1621 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1622 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1623 jit_class_gpr|jit_class_named);
1624 use = 1;
1625 div = rn(reg);
1626 movr(div, _RDX_REGNO);
1627 if (r1 != _RAX_REGNO)
1628 movr(_RAX_REGNO, r1);
1629 }
1630 else {
1631 if (r1 != _RAX_REGNO)
1632 movr(_RAX_REGNO, r1);
1633 movr(r0, _RDX_REGNO);
1634 div = r0;
1635 }
1636 }
1637 else {
1638 if (r1 != _RAX_REGNO)
1639 movr(_RAX_REGNO, r1);
1640 div = r2;
1641 }
1642
1643 if (sign) {
1644 sign_extend_rdx_rax();
1645 idivr(div);
1646 }
1647 else {
1648 ixorr(_RDX_REGNO, _RDX_REGNO);
1649 idivr_u(div);
1650 }
1651
1652 if (use)
1653 jit_unget_reg(reg);
1654
1655 if (divide)
1656 movr(r0, _RAX_REGNO);
1657 else
1658 movr(r0, _RDX_REGNO);
1659
1660 clear(_RDX_REGNO, _RDX);
1661 clear(_RAX_REGNO, _RAX);
1662}
1663
1664static void
1665_divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
1666 jit_bool_t sign, jit_bool_t divide)
1667{
1668 jit_int32_t reg;
1669 jit_int32_t div;
1670 jit_int32_t sav;
1671 jit_int32_t set;
1672 jit_int32_t use;
1673
1674 if (divide) {
1675 switch (i0) {
1676 case 1:
1677 movr(r0, r1);
1678 return;
1679 case -1:
1680 if (sign) {
1681 negr(r0, r1);
1682 return;
1683 }
1684 break;
1685 default:
1686 if (i0 > 0 && !(i0 & (i0 - 1))) {
1687 movr(r0, r1);
1688 if (sign)
1689 rshi(r0, r0, ffsl(i0) - 1);
1690 else
1691 rshi_u(r0, r0, ffsl(i0) - 1);
1692 return;
1693 }
1694 break;
1695 }
1696 }
1697 else if (i0 == 1 || (sign && i0 == -1)) {
1698 ixorr(r0, r0);
1699 return;
1700 }
1701 else if (!sign && i0 > 0 && !(i0 & (i0 - 1))) {
1702 if (can_sign_extend_int_p(i0)) {
1703 movr(r0, r1);
1704 iandi(r0, i0 - 1);
1705 }
1706 else if (r0 != r1) {
1707 movi(r0, i0 - 1);
1708 iandr(r0, r1);
1709 }
1710 else {
1711 reg = jit_get_reg(jit_class_gpr);
1712 movi(rn(reg), i0 - 1);
1713 iandr(r0, rn(reg));
1714 jit_unget_reg(reg);
1715 }
1716 return;
1717 }
1718
1719 sav = set = use = 0;
1720 isavset(_RDX_REGNO);
1721 isavset(_RAX_REGNO);
1722 allocr(_RDX_REGNO, _RDX);
1723 allocr(_RAX_REGNO, _RAX);
1724
1725 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) {
1726 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1727 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1728 jit_class_gpr|jit_class_named);
1729 use = 1;
1730 div = rn(reg);
1731 }
1732 else
1733 div = r0;
1734
1735 movi(div, i0);
1736 movr(_RAX_REGNO, r1);
1737
1738 if (sign) {
1739 sign_extend_rdx_rax();
1740 idivr(div);
1741 }
1742 else {
1743 ixorr(_RDX_REGNO, _RDX_REGNO);
1744 idivr_u(div);
1745 }
1746
1747 if (use)
1748 jit_unget_reg(reg);
1749
1750 if (divide)
1751 movr(r0, _RAX_REGNO);
1752 else
1753 movr(r0, _RDX_REGNO);
1754
1755 clear(_RDX_REGNO, _RDX);
1756 clear(_RAX_REGNO, _RAX);
1757}
1758
1759static void
1760_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1761 jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1762{
1763 jit_int32_t div;
1764 jit_int32_t reg;
1765 jit_int32_t sav;
1766 jit_int32_t set;
1767 jit_int32_t use;
1768
1769 sav = set = use = 0;
1770 qsavset(_RDX_REGNO);
1771 qsavset(_RAX_REGNO);
1772 allocr(_RDX_REGNO, _RDX);
1773 allocr(_RAX_REGNO, _RAX);
1774 if (r3 == _RAX_REGNO) {
1775 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1776 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1777 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1778 jit_class_gpr|jit_class_named);
1779 use = 1;
1780 div = rn(reg);
1781 movr(div, _RAX_REGNO);
1782 if (r2 != _RAX_REGNO)
1783 movr(_RAX_REGNO, r2);
1784 }
1785 else {
1786 if (r0 == r2)
1787 xchgr(r0, _RAX_REGNO);
1788 else {
1789 if (r0 != _RAX_REGNO)
1790 movr(r0, _RAX_REGNO);
1791 if (r2 != _RAX_REGNO)
1792 movr(_RAX_REGNO, r2);
1793 }
1794 div = r0;
1795 }
1796 }
1797 else if (r3 == _RDX_REGNO) {
1798 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1799 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1800 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1801 jit_class_gpr|jit_class_named);
1802 use = 1;
1803 div = rn(reg);
1804 movr(div, _RDX_REGNO);
1805 if (r2 != _RAX_REGNO)
1806 movr(_RAX_REGNO, r2);
1807 }
1808 else {
1809 if (r2 != _RAX_REGNO)
1810 movr(_RAX_REGNO, r2);
1811 movr(r0, _RDX_REGNO);
1812 div = r0;
1813 }
1814 }
1815 else {
1816 if (r2 != _RAX_REGNO)
1817 movr(_RAX_REGNO, r2);
1818 div = r3;
1819 }
1820 if (sign) {
1821 sign_extend_rdx_rax();
1822 idivr(div);
1823 }
1824 else {
1825 ixorr(_RDX_REGNO, _RDX_REGNO);
1826 idivr_u(div);
1827 }
1828 if (use)
1829 jit_unget_reg(reg);
1830
1831 if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1832 xchgr(_RAX_REGNO, _RDX_REGNO);
1833 else {
1834 if (r0 != _RDX_REGNO)
1835 movr(r0, _RAX_REGNO);
1836 movr(r1, _RDX_REGNO);
1837 if (r0 == _RDX_REGNO)
1838 movr(r0, _RAX_REGNO);
1839 }
1840
1841 clear(_RDX_REGNO, _RDX);
1842 clear(_RAX_REGNO, _RAX);
1843}
1844
1845static void
1846_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1847 jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1848{
1849 jit_int32_t reg;
1850
1851 reg = jit_get_reg(jit_class_gpr);
1852 movi(rn(reg), i0);
1853 if (sign)
1854 qdivr(r0, r1, r2, rn(reg));
1855 else
1856 qdivr_u(r0, r1, r2, rn(reg));
1857 jit_unget_reg(reg);
1858}
4a71579b
PC
1859
1860static void
1861_andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1862{
1863 if (r1 == r2)
1864 movr(r0, r1);
1865 else if (r0 == r1)
1866 iandr(r0, r2);
1867 else if (r0 == r2)
1868 iandr(r0, r1);
1869 else {
1870 movr(r0, r1);
1871 iandr(r0, r2);
1872 }
1873}
1874
1875static void
1876_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1877{
1878 jit_int32_t reg;
1879
1880 if (i0 == 0)
1881 ixorr(r0, r0);
1882 else if (i0 == -1)
1883 movr(r0, r1);
1884 else if (r0 == r1) {
1885 if (can_sign_extend_int_p(i0))
1886 iandi(r0, i0);
1887 else {
1888 reg = jit_get_reg(jit_class_gpr);
1889 movi(rn(reg), i0);
1890 iandr(r0, rn(reg));
1891 jit_unget_reg(reg);
1892 }
1893 }
1894 else {
1895 movi(r0, i0);
1896 iandr(r0, r1);
1897 }
1898}
1899
1900static void
1901_orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1902{
1903 if (r1 == r2)
1904 movr(r0, r1);
1905 else if (r0 == r1)
1906 iorr(r0, r2);
1907 else if (r0 == r2)
1908 iorr(r0, r1);
1909 else {
1910 movr(r0, r1);
1911 iorr(r0, r2);
1912 }
1913}
1914
1915static void
1916_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1917{
1918 jit_int32_t reg;
1919 if (i0 == 0)
1920 movr(r0, r1);
1921 else if (i0 == -1)
1922 movi(r0, -1);
1923 else if (can_sign_extend_int_p(i0)) {
1924 movr(r0, r1);
1925 iori(r0, i0);
1926 }
1927 else if (r0 != r1) {
1928 movi(r0, i0);
1929 iorr(r0, r1);
1930 }
1931 else {
1932 reg = jit_get_reg(jit_class_gpr);
1933 movi(rn(reg), i0);
1934 iorr(r0, rn(reg));
1935 jit_unget_reg(reg);
1936 }
1937}
1938
1939static void
1940_xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1941{
1942 if (r1 == r2)
1943 ixorr(r0, r0);
1944 else if (r0 == r1)
1945 ixorr(r0, r2);
1946 else if (r0 == r2)
1947 ixorr(r0, r1);
1948 else {
1949 movr(r0, r1);
1950 ixorr(r0, r2);
1951 }
1952}
1953
1954static void
1955_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1956{
1957 jit_int32_t reg;
1958 if (i0 == 0)
1959 movr(r0, r1);
1960 else if (i0 == -1)
1961 comr(r0, r1);
1962 else if (can_sign_extend_int_p(i0)) {
1963 movr(r0, r1);
1964 ixori(r0, i0);
1965 }
1966 else if (r0 != r1) {
1967 movi(r0, i0);
1968 ixorr(r0, r1);
1969 }
1970 else {
1971 reg = jit_get_reg(jit_class_gpr);
1972 movi(rn(reg), i0);
1973 ixorr(r0, rn(reg));
1974 jit_unget_reg(reg);
1975 }
1976}
1977
1978static void
1979_irotshr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1980{
1981 rex(0, WIDE, _RCX_REGNO, _NOREG, r0);
1982 ic(0xd3);
1983 mrm(0x03, code, r7(r0));
1984}
1985
1986static void
1987_rotshr(jit_state_t *_jit, jit_int32_t code,
1988 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1989{
1990 jit_int32_t reg;
1991 jit_int32_t use;
1992
1993 if (r0 == _RCX_REGNO) {
1994 reg = jit_get_reg(jit_class_gpr);
1995 movr(rn(reg), r1);
1996 if (r2 != _RCX_REGNO)
1997 movr(_RCX_REGNO, r2);
1998 irotshr(code, rn(reg));
1999 movr(_RCX_REGNO, rn(reg));
2000 jit_unget_reg(reg);
2001 }
2002 else if (r2 != _RCX_REGNO) {
2003 use = !jit_reg_free_p(_RCX);
2004 if (use) {
2005 reg = jit_get_reg(jit_class_gpr);
2006 movr(rn(reg), _RCX_REGNO);
2007 }
2008 else
2009 reg = 0;
2010 if (r1 == _RCX_REGNO) {
2011 if (r0 == r2)
2012 xchgr(r0, _RCX_REGNO);
2013 else {
2014 movr(r0, r1);
2015 movr(_RCX_REGNO, r2);
2016 }
2017 }
2018 else {
2019 movr(_RCX_REGNO, r2);
2020 movr(r0, r1);
2021 }
2022 irotshr(code, r0);
2023 if (use) {
2024 movr(_RCX_REGNO, rn(reg));
2025 jit_unget_reg(reg);
2026 }
2027 }
2028 else {
2029 movr(r0, r1);
2030 irotshr(code, r0);
2031 }
2032}
2033
2034static void
2035_irotshi(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
2036{
2037 rex(0, WIDE, _NOREG, _NOREG, r0);
2038 if (i0 == 1) {
2039 ic(0xd1);
2040 mrm(0x03, code, r7(r0));
2041 }
2042 else {
2043 ic(0xc1);
2044 mrm(0x03, code, r7(r0));
2045 ic(i0);
2046 }
2047}
2048
2049static void
2050_rotshi(jit_state_t *_jit, jit_int32_t code,
2051 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2052{
2053 movr(r0, r1);
2054 if (i0)
2055 irotshi(code, r0, i0);
2056}
2057
ba86ff93
PC
2058static void
2059_xlshr(jit_state_t *_jit, jit_bool_t sign,
2060 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
2061{
2062 jit_int32_t sav, set;
2063 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
2064 jit_word_t over, zero, over_done, done;
2065 sav = set = 0;
2066 /* %RCX must be used for shift. */
2067 qsavset(_RCX_REGNO);
2068 allocr(_RCX_REGNO, _RCX);
2069 /* Almost certainly not %RCX */
2070 t1 = r1;
2071 if (r0 == _RCX_REGNO) {
2072 s0 = jit_get_reg(jit_class_gpr);
2073 t0 = rn(s0);
2074 }
2075 else {
2076 t0 = r0;
2077 /* r0 == r1 is undefined behavior */
2078 if (r1 == _RCX_REGNO) {
2079 s1 = jit_get_reg(jit_class_gpr);
2080 t1 = rn(s1);
2081 }
2082 }
2083 /* Allocate a temporary if a register is used more than once, or if
2084 * the value to shift is %RCX */
2085 if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
2086 s2 = jit_get_reg(jit_class_gpr);
2087 t2 = rn(s2);
2088 movr(t2, r2);
2089 }
2090 else
2091 t2 = r2;
2092 /* Allocate temporary if shift is also one of the outputs */
2093 if (r0 == r3 || r1 == r3) {
2094 s3 = jit_get_reg(jit_class_gpr);
2095 t3 = rn(s3);
2096 movr(t3, r3);
2097 }
2098 else
2099 t3 = r3;
2100 /* Bits to shift right */
2101 movi(t1, 0);
2102 /* Shift in %RCX */
2103 /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
2104 movr(_RCX_REGNO, t3);
2105 /* Copy value to low register */
2106 movr(t0, t2);
2107 /* SHLD shifts t0 left pulling extra bits in the right from t1.
2108 * It is very handly to shift bignums, but lightning does not support
2109 * these, nor 128 bit integers. The use of q{l,}sh{r,i} is to verify
2110 * if there precision loss in a shift and/or have it as a quick way
2111 * to multiply or divide by powers of two. */
2112 /* SHLD */
2113 rex(0, WIDE, t1, _NOREG, t0);
2114 ic(0xf);
2115 ic(0xa5);
2116 mrm(0x03, r7(t1), r7(t0));
2117 /* Must swap results if shift value is __WORDSIZE */
2118 alui(X86_CMP, t3, __WORDSIZE);
2119 over = jes(_jit->pc.w);
2120 /* Calculate bits to shift right and fill high register */
2121 rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
2122 if (sign)
2123 rshr(t1, t2, _RCX_REGNO);
2124 else
2125 rshr_u(t1, t2, _RCX_REGNO);
2126 /* FIXME t3 == %rcx only happens in 32 bit as %a3 (JIT_A3) is not
2127 * available -- it might be made available at some point, to
2128 * allow optimizing usage or arguments in registers. For now
2129 * keep the code, as one might cheat and use _RCX directly,
2130 * what is not officially supported, but *must* work. */
2131 /* Need to sign extend high register if shift value is zero */
2132 if (t3 == _RCX_REGNO)
2133 alui(X86_CMP, t3, __WORDSIZE);
2134 else
2135 alui(X86_CMP, t3, 0);
2136 /* Finished. */
2137 zero = jes(_jit->pc.w);
2138 done = jmpsi(_jit->pc.w);
2139 /* Swap registers if shift is __WORDSIZE */
2140 patch_at(over, _jit->pc.w);
2141 xchgr(t0, t1);
2142 over_done = jmpsi(_jit->pc.w);
2143 /* If shift value is zero */
2144 patch_at(zero, _jit->pc.w);
2145 if (sign)
2146 rshi(t1, t2, __WORDSIZE - 1);
2147 else
2148 movi(t1, 0);
2149 patch_at(over_done, _jit->pc.w);
2150 patch_at(done, _jit->pc.w);
2151 /* Release %RCX (if spilled) after branches */
2152 clear(_RCX_REGNO, _RCX);
2153 if (t3 != r3)
2154 jit_unget_reg(s3);
2155 if (t2 != r2)
2156 jit_unget_reg(s2);
2157 if (t1 != r1) {
2158 movr(r1, t1);
2159 jit_unget_reg(s1);
2160 }
2161 if (t0 != r0) {
2162 movr(r0, t0);
2163 jit_unget_reg(s0);
2164 }
2165}
2166
4a71579b
PC
2167static void
2168_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2169{
2170 if (i0 == 0)
2171 movr(r0, r1);
2172 else if (i0 <= 3)
2173 lea(0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
2174 else
2175 rotshi(X86_SHL, r0, r1, i0);
2176}
2177
ba86ff93
PC
2178static void
2179_xlshi(jit_state_t *_jit, jit_bool_t sign,
2180 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
2181{
2182 if (i0 == 0) {
2183 movr(r0, r2);
2184 if (sign)
2185 rshi(r1, r2, __WORDSIZE - 1);
2186 else
2187 movi(r1, 0);
2188 }
2189 else if (i0 == __WORDSIZE) {
2190 movr(r1, r2);
2191 movi(r0, 0);
2192 }
2193 else {
2194 assert((jit_uword_t)i0 <= __WORDSIZE);
2195 if (sign)
2196 rshi(r1, r2, __WORDSIZE - i0);
2197 else
2198 rshi_u(r1, r2, __WORDSIZE - i0);
2199 lshi(r0, r2, i0);
2200 }
2201}
2202
2203static void
2204_xrshr(jit_state_t *_jit, jit_bool_t sign,
2205 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
2206{
2207 jit_int32_t sav, set;
2208 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
2209 jit_word_t over, zero, done;
2210 sav = set = 0;
2211 /* %RCX must be used for shift. */
2212 qsavset(_RCX_REGNO);
2213 allocr(_RCX_REGNO, _RCX);
2214 /* Almost certainly not %RCX */
2215 t1 = r1;
2216 if (r0 == _RCX_REGNO) {
2217 s0 = jit_get_reg(jit_class_gpr);
2218 t0 = rn(s0);
2219 }
2220 else {
2221 t0 = r0;
2222 /* r0 == r1 is undefined behavior */
2223 if (r1 == _RCX_REGNO) {
2224 s1 = jit_get_reg(jit_class_gpr);
2225 t1 = rn(s1);
2226 }
2227 }
2228 /* Allocate a temporary if a register is used more than once, or if
2229 * the value to shift is %RCX */
2230 if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
2231 s2 = jit_get_reg(jit_class_gpr);
2232 t2 = rn(s2);
2233 movr(t2, r2);
2234 }
2235 else
2236 t2 = r2;
2237 /* Allocate temporary if shift is also one of the outputs */
2238 if (r0 == r3 || r1 == r3) {
2239 s3 = jit_get_reg(jit_class_gpr);
2240 t3 = rn(s3);
2241 movr(t3, r3);
2242 }
2243 else
2244 t3 = r3;
2245 /* Bits to shift left */
2246 if (sign)
2247 rshi(t1, t2, __WORDSIZE - 1);
2248 else
2249 movi(t1, 0);
2250 /* Shift in %RCX */
2251 /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
2252 movr(_RCX_REGNO, t3);
2253 /* Copy value to low register */
2254 movr(t0, t2);
2255 /* SHRD shifts t0 right pulling extra bits in the left from t1 */
2256 /* SHRD */
2257 rex(0, WIDE, t1, _NOREG, t0);
2258 ic(0xf);
2259 ic(0xad);
2260 mrm(0x03, r7(t1), r7(t0));
2261 /* Must swap results if shift value is __WORDSIZE */
2262 alui(X86_CMP, t3, __WORDSIZE);
2263 over = jes(_jit->pc.w);
2264 /* Already zero or sign extended if shift value is zero */
2265 alui(X86_CMP, t3, 0);
2266 zero = jes(_jit->pc.w);
2267 /* Calculate bits to shift left and fill high register */
2268 rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
2269 lshr(t1, t2, _RCX_REGNO);
2270 done = jmpsi(_jit->pc.w);
2271 /* Swap registers if shift is __WORDSIZE */
2272 patch_at(over, _jit->pc.w);
2273 xchgr(t0, t1);
2274 /* If shift value is zero */
2275 patch_at(zero, _jit->pc.w);
2276 patch_at(done, _jit->pc.w);
2277 /* Release %RCX (if spilled) after branches */
2278 clear(_RCX_REGNO, _RCX);
2279 if (t3 != r3)
2280 jit_unget_reg(s3);
2281 if (t2 != r2)
2282 jit_unget_reg(s2);
2283 if (t1 != r1) {
2284 movr(r1, t1);
2285 jit_unget_reg(s1);
2286 }
2287 if (t0 != r0) {
2288 movr(r0, t0);
2289 jit_unget_reg(s0);
2290 }
2291}
2292
2293static void
2294_xrshi(jit_state_t *_jit, jit_bool_t sign,
2295 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
2296{
2297 if (i0 == 0) {
2298 movr(r0, r2);
2299 if (sign)
2300 rshi(r1, r2, __WORDSIZE - 1);
2301 else
2302 movi(r1, 0);
2303 }
2304 else if (i0 == __WORDSIZE) {
2305 movr(r1, r2);
2306 if (sign)
2307 rshi(r0, r2, __WORDSIZE - 1);
2308 else
2309 movi(r0, 0);
2310 }
2311 else {
2312 assert((jit_uword_t)i0 <= __WORDSIZE);
2313 lshi(r1, r2, __WORDSIZE - i0);
2314 if (sign)
2315 rshi(r0, r2, i0);
2316 else
2317 rshi_u(r0, r2, i0);
2318 }
2319}
2320
4a71579b
PC
2321static void
2322_unr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
2323{
2324 rex(0, WIDE, _NOREG, _NOREG, r0);
2325 ic(0xf7);
2326 mrm(0x03, code, r7(r0));
2327}
2328
2329static void
2330_negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2331{
2332 if (r0 == r1)
2333 inegr(r0);
2334 else {
2335 ixorr(r0, r0);
2336 isubr(r0, r1);
2337 }
2338}
2339
2340static void
2341_comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2342{
2343 movr(r0, r1);
2344 icomr(r0);
2345}
2346
2347#if USE_INC_DEC
2348static void
2349_incr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2350{
2351 movr(r0, r1);
2352# if __X64
2353 rex(0, WIDE, _NOREG, _NOREG, r0);
2354 ic(0xff);
2355 ic(0xc0 | r7(r0));
2356# else
2357 ic(0x40 | r7(r0));
2358# endif
2359}
2360
2361static void
2362_decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2363{
2364 movr(r0, r1);
2365# if __X64
2366 rex(0, WIDE, _NOREG, _NOREG, r0);
2367 ic(0xff);
2368 ic(0xc8 | r7(r0));
2369# else
2370 ic(0x48 | r7(r0));
2371# endif
2372}
2373#endif
2374
79bfeef6
PC
2375static void
2376_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2377{
2378 comr(r0, r1);
2379 clzr(r0, r0);
2380}
2381
2382static void
2383_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2384{
2385 jit_word_t w, x;
2386 /* LZCNT */
2387 if (jit_cpu.abm)
2388 ic(0xf3);
2389 /* else BSR */
2390 rex(0, WIDE, r0, _NOREG, r1);
2391 ic(0x0f);
2392 ic(0xbd);
2393 mrm(0x3, r7(r0), r7(r1));
2394 if (!jit_cpu.abm) {
2395 /* jump if undefined: r1 == 0 */
2396 w = jccs(X86_CC_E, _jit->pc.w);
2397 /* count leading zeros */
2398 rsbi(r0, r0, __WORDSIZE - 1);
2399 /* done */
2400 x = jmpsi(_jit->pc.w);
2401 /* if r1 == 0 */
2402 patch_at(w, _jit->pc.w);
2403 movi(r0, __WORDSIZE);
2404 /* not undefined */
2405 patch_at(x, _jit->pc.w);
2406 }
2407 /* LZCNT has defined behavior for value zero and count leading zeros */
2408}
2409
2410static void
2411_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2412{
2413 comr(r0, r1);
2414 ctzr(r0, r0);
2415}
2416
2417static void
2418_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2419{
2420 jit_word_t w;
2421 jit_int32_t t0;
2422 if (!jit_cpu.abm) {
2423 if (jit_cmov_p())
2424 t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk);
2425 else
2426 t0 = _NOREG;
2427 if (t0 != _NOREG)
2428 movi(rn(t0), __WORDSIZE);
2429 }
2430 /* TZCNT */
2431 if (jit_cpu.abm)
2432 ic(0xf3);
2433 /* else BSF */
2434 rex(0, WIDE, r0, _NOREG, r1);
2435 ic(0x0f);
2436 ic(0xbc);
2437 mrm(0x3, r7(r0), r7(r1));
2438 if (!jit_cpu.abm) {
2439 /* No conditional move or need spill/reload a temporary */
2440 if (t0 == _NOREG) {
2441 w = jccs(X86_CC_E, _jit->pc.w);
2442 movi(r0, __WORDSIZE);
2443 patch_at(w, _jit->pc.w);
2444 }
2445 else {
2446 /* CMOVE */
2447 rex(0, WIDE, r0, _NOREG, rn(t0));
2448 ic(0x0f);
2449 ic(0x44);
2450 mrm(0x3, r7(r0), r7(rn(t0)));
2451 jit_unget_reg(t0);
2452 }
2453 }
2454 /* TZCNT has defined behavior for value zero */
2455}
2456
ba86ff93
PC
2457static void
2458_rbitr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
2459{
2460 jit_word_t loop;
2461 jit_int32_t sav, set;
2462 jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
2463 static const unsigned char swap_tab[256] = {
2464 0, 128, 64, 192, 32, 160, 96, 224,
2465 16, 144, 80, 208, 48, 176, 112, 240,
2466 8, 136, 72, 200, 40, 168, 104, 232,
2467 24, 152, 88, 216 ,56, 184, 120, 248,
2468 4, 132, 68, 196, 36, 164, 100, 228,
2469 20, 148, 84, 212, 52, 180, 116, 244,
2470 12, 140, 76, 204, 44, 172, 108, 236,
2471 28, 156, 92, 220, 60, 188, 124, 252,
2472 2, 130, 66, 194, 34, 162, 98, 226,
2473 18, 146, 82, 210, 50, 178, 114, 242,
2474 10, 138, 74, 202, 42, 170, 106, 234,
2475 26, 154, 90, 218, 58, 186, 122, 250,
2476 6, 134, 70, 198, 38, 166, 102, 230,
2477 22, 150, 86, 214, 54, 182, 118, 246,
2478 14, 142, 78, 206, 46, 174, 110, 238,
2479 30, 158, 94, 222, 62, 190, 126, 254,
2480 1, 129, 65, 193, 33, 161, 97, 225,
2481 17, 145, 81, 209, 49, 177, 113, 241,
2482 9, 137, 73, 201, 41, 169, 105, 233,
2483 25, 153, 89, 217, 57, 185, 121, 249,
2484 5, 133, 69, 197, 37, 165, 101, 229,
2485 21, 149, 85, 213, 53, 181, 117, 245,
2486 13, 141, 77, 205, 45, 173, 109, 237,
2487 29, 157, 93, 221, 61, 189, 125, 253,
2488 3, 131, 67, 195, 35, 163, 99, 227,
2489 19, 147, 83, 211, 51, 179, 115, 243,
2490 11, 139, 75, 203, 43, 171, 107, 235,
2491 27, 155, 91, 219, 59, 187, 123, 251,
2492 7, 135, 71, 199, 39, 167, 103, 231,
2493 23, 151, 87, 215, 55, 183, 119, 247,
2494 15, 143, 79, 207, 47, 175, 111, 239,
2495 31, 159, 95, 223, 63, 191, 127, 255
2496 };
2497 sav = set = 0;
2498 isavset(_RCX_REGNO);
2499 allocr(_RCX_REGNO, _RCX);
2500 if (r0 == _RCX_REGNO) {
2501 t0 = jit_get_reg(jit_class_gpr);
2502 r0_reg = rn(t0);
2503 }
2504 else {
2505 t0 = JIT_NOREG;
2506 r0_reg = r0;
2507 }
2508 if (r1 == _RCX_REGNO || r0 == r1) {
2509 t1 = jit_get_reg(jit_class_gpr);
2510 r1_reg = rn(t1);
2511 movr(r1_reg, r1);
2512 }
2513 else {
2514 t1 = JIT_NOREG;
2515 r1_reg = r1;
2516 }
2517 t2 = jit_get_reg(jit_class_gpr);
2518 t3 = jit_get_reg(jit_class_gpr);
2519#if __WORDSIZE == 32
2520 /* Avoid condition that causes running out of registers */
2521 if (!reg8_p(r1_reg)) {
2522 movi(rn(t2), 0xff);
2523 andr(rn(t2), r1_reg, rn(t2));
2524 }
2525 else
2526#endif
2527 extr_uc(rn(t2), r1_reg);
2528 movi(rn(t3), (jit_word_t)swap_tab);
2529 ldxr_uc(r0_reg, rn(t3), rn(t2));
2530 movi(_RCX_REGNO, 8);
2531 loop = _jit->pc.w;
2532 rshr(rn(t2), r1_reg, _RCX_REGNO);
2533 extr_uc(rn(t2), rn(t2));
2534 lshi(r0_reg, r0_reg, 8);
2535 ldxr_uc(rn(t2), rn(t3), rn(t2));
2536 orr(r0_reg, r0_reg, rn(t2));
2537 addi(_RCX_REGNO, _RCX_REGNO, 8);
2538 alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
2539 jls(loop);
2540 clear(_RCX_REGNO, _RCX);
2541 jit_unget_reg(t3);
2542 jit_unget_reg(t2);
2543 if (t1 != JIT_NOREG)
2544 jit_unget_reg(t1);
2545 if (t0 != JIT_NOREG) {
2546 movr(r0, r0_reg);
2547 jit_unget_reg(t0);
2548 }
2549}
2550
2551static void
2552_popcntr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
2553{
2554 if (jit_cpu.abm) {
2555 ic(0xf3);
2556 rex(0, WIDE, r0, _NOREG, r1);
2557 ic(0x0f);
2558 ic(0xb8);
2559 mrm(0x3, r7(r0), r7(r1));
2560 }
2561 else {
2562 jit_word_t loop;
2563 jit_int32_t sav, set;
2564 jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
2565 static const unsigned char pop_tab[256] = {
2566 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
2567 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2568 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2569 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2570 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2571 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2572 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2573 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
2574 };
2575 sav = set = 0;
2576 isavset(_RCX_REGNO);
2577 allocr(_RCX_REGNO, _RCX);
2578 if (r0 == _RCX_REGNO) {
2579 t0 = jit_get_reg(jit_class_gpr);
2580 r0_reg = rn(t0);
2581 }
2582 else {
2583 t0 = JIT_NOREG;
2584 r0_reg = r0;
2585 }
2586 if (r1 == _RCX_REGNO || r0 == r1) {
2587 t1 = jit_get_reg(jit_class_gpr);
2588 r1_reg = rn(t1);
2589 movr(r1_reg, r1);
2590 }
2591 else {
2592 t1 = JIT_NOREG;
2593 r1_reg = r1;
2594 }
2595 t2 = jit_get_reg(jit_class_gpr);
2596 t3 = jit_get_reg(jit_class_gpr);
2597#if __WORDSIZE == 32
2598 /* Avoid condition that causes running out of registers */
2599 if (!reg8_p(r1_reg)) {
2600 movi(rn(t2), 0xff);
2601 andr(rn(t2), r1_reg, rn(t2));
2602 }
2603 else
2604#endif
2605 extr_uc(rn(t2), r1_reg);
2606 movi(rn(t3), (jit_word_t)pop_tab);
2607 ldxr_uc(r0_reg, rn(t3), rn(t2));
2608 movi(_RCX_REGNO, 8);
2609 loop = _jit->pc.w;
2610 rshr(rn(t2), r1_reg, _RCX_REGNO);
2611 extr_uc(rn(t2), rn(t2));
2612 ldxr_uc(rn(t2), rn(t3), rn(t2));
2613 addr(r0_reg, r0_reg, rn(t2));
2614 addi(_RCX_REGNO, _RCX_REGNO, 8);
2615 alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
2616 jls(loop);
2617 clear(_RCX_REGNO, _RCX);
2618 jit_unget_reg(t3);
2619 jit_unget_reg(t2);
2620 if (t1 != JIT_NOREG)
2621 jit_unget_reg(t1);
2622 if (t0 != JIT_NOREG) {
2623 movr(r0, r0_reg);
2624 jit_unget_reg(t0);
2625 }
2626 }
2627}
2628
4a71579b
PC
2629static void
2630_cr(jit_state_t *_jit,
2631 jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2632{
2633 jit_int32_t reg;
2634 jit_bool_t same;
2635 if (reg8_p(r0)) {
2636 same = r0 == r1 || r0 == r2;
2637 if (!same)
2638 ixorr(r0, r0);
2639 icmpr(r1, r2);
2640 if (same)
2641 imovi(r0, 0);
2642 cc(code, r0);
2643 }
2644 else {
2645 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2646 ixorr(rn(reg), rn(reg));
2647 icmpr(r1, r2);
2648 cc(code, rn(reg));
2649 movr(r0, rn(reg));
2650 jit_unget_reg(reg);
2651 }
2652}
2653
2654static void
2655_ci(jit_state_t *_jit,
2656 jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2657{
2658 jit_int32_t reg;
2659 jit_bool_t same;
2660 if (reg8_p(r0)) {
2661 same = r0 == r1;
2662 if (!same)
2663 ixorr(r0, r0);
2664 icmpi(r1, i0);
2665 if (same)
2666 imovi(r0, 0);
2667 cc(code, r0);
2668 }
2669 else {
2670 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2671 ixorr(rn(reg), rn(reg));
2672 icmpi(r1, i0);
2673 cc(code, rn(reg));
2674 movr(r0, rn(reg));
2675 jit_unget_reg(reg);
2676 }
2677}
2678
2679static void
2680_ci0(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
2681{
2682 jit_int32_t reg;
2683 jit_bool_t same;
2684 if (reg8_p(r0)) {
2685 same = r0 == r1;
2686 if (!same)
2687 ixorr(r0, r0);
2688 testr(r1, r1);
2689 if (same)
2690 imovi(r0, 0);
2691 cc(code, r0);
2692 }
2693 else {
2694 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2695 ixorr(rn(reg), rn(reg));
2696 testr(r1, r1);
2697 cc(code, rn(reg));
2698 movr(r0, rn(reg));
2699 jit_unget_reg(reg);
2700 }
2701}
2702
2703static void
2704_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2705{
2706 if (r1 == r2)
2707 movi(r0, 0);
2708 else
2709 cr(X86_CC_L, r0, r1, r2);
2710}
2711
2712static void
2713_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2714{
2715 if (i0)
2716 ci(X86_CC_L, r0, r1, i0);
2717 else
2718 ci0(X86_CC_S, r0, r1);
2719}
2720
2721static void
2722_ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2723{
2724 if (r1 == r2)
2725 movi(r0, 0);
2726 else
2727 cr(X86_CC_B, r0, r1, r2);
2728}
2729
2730static void
2731_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2732{
2733 if (r1 == r2)
2734 movi(r0, 1);
2735 else
2736 cr(X86_CC_LE, r0, r1, r2);
2737}
2738
2739static void
2740_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2741{
2742 if (r1 == r2)
2743 movi(r0, 1);
2744 else
2745 cr(X86_CC_BE, r0, r1, r2);
2746}
2747
2748static void
2749_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2750{
2751 if (i0)
2752 ci(X86_CC_BE, r0, r1, i0);
2753 else
2754 ci0(X86_CC_E, r0, r1);
2755}
2756
2757static void
2758_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2759{
2760 if (r1 == r2)
2761 movi(r0, 1);
2762 else
2763 cr(X86_CC_E, r0, r1, r2);
2764}
2765
2766static void
2767_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2768{
2769 if (i0)
2770 ci(X86_CC_E, r0, r1, i0);
2771 else
2772 ci0(X86_CC_E, r0, r1);
2773}
2774
2775static void
2776_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2777{
2778 if (r1 == r2)
2779 movi(r0, 1);
2780 else
2781 cr(X86_CC_GE, r0, r1, r2);
2782}
2783
2784static void
2785_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2786{
2787 if (i0)
2788 ci(X86_CC_GE, r0, r1, i0);
2789 else
2790 ci0(X86_CC_NS, r0, r1);
2791}
2792
2793static void
2794_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2795{
2796 if (r1 == r2)
2797 movi(r0, 1);
2798 else
2799 cr(X86_CC_AE, r0, r1, r2);
2800}
2801
2802static void
2803_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2804{
2805 if (i0)
2806 ci(X86_CC_AE, r0, r1, i0);
2807 else
2808 ci0(X86_CC_NB, r0, r1);
2809}
2810
2811static void
2812_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2813{
2814 if (r1 == r2)
2815 movi(r0, 0);
2816 else
2817 cr(X86_CC_G, r0, r1, r2);
2818}
2819
2820static void
2821_gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2822{
2823 if (r1 == r2)
2824 movi(r0, 0);
2825 else
2826 cr(X86_CC_A, r0, r1, r2);
2827}
2828
2829static void
2830_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2831{
2832 if (i0)
2833 ci(X86_CC_A, r0, r1, i0);
2834 else
2835 ci0(X86_CC_NE, r0, r1);
2836}
2837
2838static void
2839_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2840{
2841 if (r1 == r2)
2842 movi(r0, 0);
2843 else
2844 cr(X86_CC_NE, r0, r1, r2);
2845}
2846
2847static void
2848_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2849{
2850 if (i0)
2851 ci(X86_CC_NE, r0, r1, i0);
2852 else
2853 ci0(X86_CC_NE, r0, r1);
2854}
2855
2856static void
2857_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2858{
2859 if (r0 != r1) {
2860 rex(0, 1, r1, _NOREG, r0);
2861 ic(0x89);
2862 ic(0xc0 | (r1 << 3) | r7(r0));
2863 }
2864}
2865
2866static void
2867_imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2868{
2869#if __X64
2870# if !__X64_32
2871 if (fits_uint32_p(i0)) {
2872# endif
2873 rex(0, 0, _NOREG, _NOREG, r0);
2874 ic(0xb8 | r7(r0));
2875 ii(i0);
2876# if !__X64_32
2877 }
79bfeef6
PC
2878 else if (can_sign_extend_int_p(i0)) {
2879 rex(0, 1, _NOREG, _NOREG, r0);
2880 ic(0xc7);
2881 ic(0xc0 | r7(r0));
2882 ii(i0);
2883 }
4a71579b
PC
2884 else {
2885 rex(0, 1, _NOREG, _NOREG, r0);
2886 ic(0xb8 | r7(r0));
2887 il(i0);
2888 }
2889# endif
2890#else
2891 ic(0xb8 | r7(r0));
2892 ii(i0);
2893#endif
2894}
2895
79bfeef6
PC
2896#if CAN_RIP_ADDRESS
2897static jit_word_t
2898#else
4a71579b 2899static void
79bfeef6 2900#endif
4a71579b
PC
2901_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2902{
79bfeef6
PC
2903#if CAN_RIP_ADDRESS
2904 jit_word_t w, rel;
2905 w = _jit->pc.w;
2906 rel = i0 - (w + 8);
2907 rel = rel < 0 ? rel - 8 : rel + 8;
2908 if (can_sign_extend_int_p(rel)) {
2909 /* lea rel(%rip), %r0 */
2910 rex(0, WIDE, r0, _NOREG, _NOREG);
2911 w = _jit->pc.w;
2912 ic(0x8d);
2913 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
2914 }
2915 else
2916#endif
4a71579b
PC
2917 if (i0)
2918 imovi(r0, i0);
2919 else
2920 ixorr(r0, r0);
79bfeef6
PC
2921#if CAN_RIP_ADDRESS
2922 return (w);
2923#endif
4a71579b
PC
2924}
2925
2926static jit_word_t
2927_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2928{
79bfeef6 2929 jit_word_t w;
4a71579b 2930 rex(0, WIDE, _NOREG, _NOREG, r0);
79bfeef6 2931 w = _jit->pc.w;
4a71579b
PC
2932 ic(0xb8 | r7(r0));
2933 il(i0);
79bfeef6 2934 return (w);
4a71579b
PC
2935}
2936
2937static void
2938_movcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2939{
2940 rex(0, WIDE, r0, _NOREG, r1);
2941 ic(0x0f);
2942 ic(0xbe);
2943 mrm(0x03, r7(r0), r7(r1));
2944}
2945
2946static void
2947_movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2948{
2949 rex(0, WIDE, r0, _NOREG, r1);
2950 ic(0x0f);
2951 ic(0xb6);
2952 mrm(0x03, r7(r0), r7(r1));
2953}
2954
2955static void
2956_movsr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2957{
2958 rex(0, WIDE, r0, _NOREG, r1);
2959 ic(0x0f);
2960 ic(0xbf);
2961 mrm(0x03, r7(r0), r7(r1));
2962}
2963
2964static void
2965_movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2966{
2967 rex(0, WIDE, r0, _NOREG, r1);
2968 ic(0x0f);
2969 ic(0xb7);
2970 mrm(0x03, r7(r0), r7(r1));
2971}
2972
ba3814c1
PC
2973static void
2974_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2975 jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
2976{
2977 jit_int32_t save_rax, restore_rax;
2978 jit_int32_t ascasr_reg, ascasr_use;
2979 if (r0 != _RAX_REGNO) { /* result not in %rax */
2980 if (r2 != _RAX_REGNO) { /* old value not in %rax */
2981 save_rax = jit_get_reg(jit_class_gpr);
2982 movr(rn(save_rax), _RAX_REGNO);
2983 restore_rax = 1;
2984 }
2985 else
2986 restore_rax = 0;
2987 }
2988 else
2989 restore_rax = 0;
2990 if (r2 != _RAX_REGNO)
2991 movr(_RAX_REGNO, r2);
2992 if (r1 == _NOREG) { /* using immediate address */
2993 if (!can_sign_extend_int_p(i0)) {
2994 ascasr_reg = jit_get_reg(jit_class_gpr);
2995 if (ascasr_reg == _RAX) {
2996 ascasr_reg = jit_get_reg(jit_class_gpr);
2997 jit_unget_reg(_RAX);
2998 }
2999 ascasr_use = 1;
3000 movi(rn(ascasr_reg), i0);
3001 }
3002 else
3003 ascasr_use = 0;
3004 }
3005 else
3006 ascasr_use = 0;
3007 ic(0xf0); /* lock */
3008 if (ascasr_use)
3009 rex(0, WIDE, r3, _NOREG, rn(ascasr_reg));
3010 else
3011 rex(0, WIDE, r3, _NOREG, r1);
3012 ic(0x0f);
3013 ic(0xb1);
3014 if (r1 != _NOREG) /* casr */
3015 rx(r3, 0, r1, _NOREG, _SCL1);
3016 else { /* casi */
3017 if (ascasr_use)
3018 rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */
3019 else
3020 rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */
3021 }
3022 cc(X86_CC_E, r0);
3023 if (r0 != _RAX_REGNO)
3024 movr(r0, _RAX_REGNO);
3025 if (restore_rax) {
3026 movr(_RAX_REGNO, rn(save_rax));
3027 jit_unget_reg(save_rax);
3028 }
3029 if (ascasr_use)
3030 jit_unget_reg(ascasr_reg);
3031}
3032
1f22b268
PC
3033static void
3034_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3035{
3036 assert(jit_cmov_p());
3037
3038 testr(r2, r2);
3039
3040 rex(0, WIDE, r0, _NOREG, r1);
3041 ic(0x0f);
3042 ic(0x45);
3043 mrm(0x03, r7(r0), r7(r1));
3044}
3045
3046static void
3047_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3048{
3049 assert(jit_cmov_p());
3050
3051 testr(r2, r2);
3052
3053 rex(0, WIDE, r0, _NOREG, r1);
3054 ic(0x0f);
3055 ic(0x44);
3056 mrm(0x03, r7(r0), r7(r1));
3057}
3058
4a71579b
PC
3059#if __X64
3060static void
3061_movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3062{
3063 rex(0, 1, r0, _NOREG, r1);
3064 ic(0x63);
3065 mrm(0x03, r7(r0), r7(r1));
3066}
3067
3068static void
3069_movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3070{
3071 rex(0, 0, r1, _NOREG, r0);
3072 ic(0x89);
3073 ic(0xc0 | (r1 << 3) | r7(r0));
3074}
3075#endif
3076
3077static void
40a44dcb 3078_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b
PC
3079{
3080 extr_us(r0, r1);
3081 ic(0x66);
3082 rex(0, 0, _NOREG, _NOREG, r0);
3083 ic(0xc1);
3084 mrm(0x03, X86_ROR, r7(r0));
3085 ic(8);
3086}
3087
3088static void
40a44dcb 3089_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b
PC
3090{
3091 movr(r0, r1);
3092 rex(0, 0, _NOREG, _NOREG, r0);
3093 ic(0x0f);
3094 ic(0xc8 | r7(r0));
3095}
3096
3097#if __X64 && !__X64_32
3098static void
40a44dcb 3099_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b
PC
3100{
3101 movr(r0, r1);
3102 rex(0, 1, _NOREG, _NOREG, r0);
3103 ic(0x0f);
3104 ic(0xc8 | r7(r0));
3105}
3106#endif
3107
ba86ff93
PC
3108static void
3109_extr(jit_state_t *_jit,
3110 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3111{
3112 jit_word_t mask;
3113 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3114 if (i1 == __WORDSIZE)
3115 movr(r0, r1);
3116 else {
3117 if (__WORDSIZE - (i0 + i1)) {
3118 lshi(r0, r1, __WORDSIZE - (i0 + i1));
3119 rshi(r0, r0, __WORDSIZE - i1);
3120 }
3121 else
3122 rshi(r0, r1, __WORDSIZE - i1);
3123 }
3124}
3125
3126static void
3127_extr_u(jit_state_t *_jit,
3128 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3129{
3130 jit_int32_t t0;
3131 jit_word_t mask;
3132 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3133 if (i1 == __WORDSIZE)
3134 movr(r0, r1);
3135 /* Only cheaper in code size or number of instructions if i0 is not zero */
3136 /* Number of cpu cicles not tested */
3137 else if (i0 && jit_cpu.bmi2) {
3138 mask = ((ONE << i1) - 1) << i0;
3139 t0 = jit_get_reg(jit_class_gpr);
3140 movi(rn(t0), mask);
3141 /* PEXT */
3142 vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 2);
3143 ic(0xf5);
3144 mrm(0x03, r7(r0), r7(rn(t0)));
3145 jit_unget_reg(t0);
3146 }
3147 else {
3148 if (i0)
3149 rshi_u(r0, r1, i0);
3150 andi(r0, r0, (ONE << i1) - 1);
3151 }
3152}
3153
3154static void
3155_depr(jit_state_t *_jit,
3156 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3157{
3158 jit_word_t mask;
3159 jit_int32_t t0, t1;
3160 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3161 if (i1 == __WORDSIZE)
3162 movr(r0, r1);
3163 /* Only cheaper in code size or number of instructions if i0 is not zero */
3164 /* Number of cpu cicles not tested */
3165 else if (i0 && jit_cpu.bmi2) {
3166 mask = ((ONE << i1) - 1) << i0;
3167 t0 = jit_get_reg(jit_class_gpr);
3168 t1 = jit_get_reg(jit_class_gpr);
3169 movi(rn(t0), mask);
3170 movr(rn(t1), r0);
3171 /* PDEP */
3172 vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 3);
3173 ic(0xf5);
3174 mrm(0x03, r7(r0), r7(rn(t0)));
3175 andi(rn(t1), rn(t1), ~mask);
3176 orr(r0, r0, rn(t1));
3177 jit_unget_reg(t1);
3178 jit_unget_reg(t0);
3179 }
3180 else {
3181 mask = (ONE << i1) - 1;
3182 t0 = jit_get_reg(jit_class_gpr);
3183 andi(rn(t0), r1, mask);
3184 if (i0) {
3185 lshi(rn(t0), rn(t0), i0);
3186 mask <<= i0;
3187 }
3188 andi(r0, r0, ~mask);
3189 orr(r0, r0, rn(t0));
3190 jit_unget_reg(t0);
3191 }
3192}
3193
4a71579b
PC
3194static void
3195_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3196{
3197 jit_int32_t reg;
3198 if (reg8_p(r1))
3199 movcr(r0, r1);
3200 else {
3201 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3202 movr(rn(reg), r1);
3203 movcr(r0, rn(reg));
3204 jit_unget_reg(reg);
3205 }
3206}
3207
3208static void
3209_extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3210{
3211 jit_int32_t reg;
3212 if (reg8_p(r1))
3213 movcr_u(r0, r1);
3214 else {
3215 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3216 movr(rn(reg), r1);
3217 movcr_u(r0, rn(reg));
3218 jit_unget_reg(reg);
3219 }
3220}
3221
3222static void
3223_ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3224{
3225 rex(0, WIDE, r0, _NOREG, r1);
3226 ic(0x0f);
3227 ic(0xbe);
3228 rx(r0, 0, r1, _NOREG, _SCL1);
3229}
3230
3231static void
3232_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3233{
3234 jit_int32_t reg;
79bfeef6
PC
3235#if CAN_RIP_ADDRESS
3236 jit_word_t rel = i0 - _jit->pc.w;
3237 rel = rel < 0 ? rel - 8 : rel + 8;
3238 if (can_sign_extend_int_p(rel)) {
3239 rex(0, WIDE, r0, _NOREG, _NOREG);
3240 ic(0x0f);
3241 ic(0xbe);
3242 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3243 }
3244 else
3245#endif
3246 if (address_p(i0)) {
4a71579b
PC
3247 rex(0, WIDE, r0, _NOREG, _NOREG);
3248 ic(0x0f);
3249 ic(0xbe);
3250 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3251 }
3252 else {
3253 reg = jit_get_reg(jit_class_gpr);
3254 movi(rn(reg), i0);
3255 ldr_c(r0, rn(reg));
3256 jit_unget_reg(reg);
3257 }
3258}
3259
3260static void
3261_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3262{
3263 rex(0, WIDE, r0, _NOREG, r1);
3264 ic(0x0f);
3265 ic(0xb6);
3266 rx(r0, 0, r1, _NOREG, _SCL1);
3267}
3268
3269static void
3270_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3271{
3272 jit_int32_t reg;
79bfeef6
PC
3273#if CAN_RIP_ADDRESS
3274 jit_word_t rel = i0 - _jit->pc.w;
3275 rel = rel < 0 ? rel - 8 : rel + 8;
3276 if (can_sign_extend_int_p(rel)) {
3277 rex(0, WIDE, r0, _NOREG, _NOREG);
3278 ic(0x0f);
3279 ic(0xb6);
3280 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3281 }
3282 else
3283#endif
3284 if (address_p(i0)) {
4a71579b
PC
3285 rex(0, WIDE, r0, _NOREG, _NOREG);
3286 ic(0x0f);
3287 ic(0xb6);
3288 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3289 }
3290 else {
3291 reg = jit_get_reg(jit_class_gpr);
3292 movi(rn(reg), i0);
3293 ldr_uc(r0, rn(reg));
3294 jit_unget_reg(reg);
3295 }
3296}
3297
3298static void
3299_ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3300{
3301 rex(0, WIDE, r0, _NOREG, r1);
3302 ic(0x0f);
3303 ic(0xbf);
3304 rx(r0, 0, r1, _NOREG, _SCL1);
3305}
3306
3307static void
3308_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3309{
3310 jit_int32_t reg;
79bfeef6
PC
3311#if CAN_RIP_ADDRESS
3312 jit_word_t rel = i0 - _jit->pc.w;
3313 rel = rel < 0 ? rel - 8 : rel + 8;
3314 if (can_sign_extend_int_p(rel)) {
3315 rex(0, WIDE, r0, _NOREG, _NOREG);
3316 ic(0x0f);
3317 ic(0xbf);
3318 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3319 }
3320 else
3321#endif
3322 if (address_p(i0)) {
4a71579b
PC
3323 rex(0, WIDE, r0, _NOREG, _NOREG);
3324 ic(0x0f);
3325 ic(0xbf);
3326 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3327 }
3328 else {
3329 reg = jit_get_reg(jit_class_gpr);
3330 movi(rn(reg), i0);
3331 ldr_s(r0, rn(reg));
3332 jit_unget_reg(reg);
3333 }
3334}
3335
3336static void
3337_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3338{
3339 rex(0, WIDE, r0, _NOREG, r1);
3340 ic(0x0f);
3341 ic(0xb7);
3342 rx(r0, 0, r1, _NOREG, _SCL1);
3343}
3344
3345static void
3346_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3347{
3348 jit_int32_t reg;
79bfeef6
PC
3349#if CAN_RIP_ADDRESS
3350 jit_word_t rel = i0 - _jit->pc.w;
3351 rel = rel < 0 ? rel - 8 : rel + 8;
3352 if (can_sign_extend_int_p(rel)) {
3353 rex(0, WIDE, r0, _NOREG, _NOREG);
3354 ic(0x0f);
3355 ic(0xb7);
3356 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3357 }
3358 else
3359#endif
3360 if (address_p(i0)) {
4a71579b
PC
3361 rex(0, WIDE, r0, _NOREG, _NOREG);
3362 ic(0x0f);
3363 ic(0xb7);
3364 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3365 }
3366 else {
3367 reg = jit_get_reg(jit_class_gpr);
3368 movi(rn(reg), i0);
3369 ldr_us(r0, rn(reg));
3370 jit_unget_reg(reg);
3371 }
3372}
3373
3374#if __X32 || !__X64_32
3375static void
3376_ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3377{
3378#if __X64
3379 rex(0, WIDE, r0, _NOREG, r1);
3380 ic(0x63);
3381#else
3382 ic(0x8b);
3383#endif
3384 rx(r0, 0, r1, _NOREG, _SCL1);
3385}
3386
3387static void
3388_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3389{
3390 jit_int32_t reg;
79bfeef6
PC
3391#if CAN_RIP_ADDRESS
3392 jit_word_t rel = i0 - _jit->pc.w;
3393 rel = rel < 0 ? rel - 8 : rel + 8;
3394 if (can_sign_extend_int_p(rel)) {
3395 rex(0, WIDE, r0, _NOREG, _NOREG);
3396 ic(0x63);
3397 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3398 }
3399 else
3400#endif
3401 if (address_p(i0)) {
4a71579b
PC
3402#if __X64
3403 rex(0, WIDE, r0, _NOREG, _NOREG);
3404 ic(0x63);
3405#else
3406 ic(0x8b);
3407#endif
3408 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3409 }
3410 else {
3411 reg = jit_get_reg(jit_class_gpr);
3412 movi(rn(reg), i0);
3413 ldr_i(r0, rn(reg));
3414 jit_unget_reg(reg);
3415 }
3416}
3417#endif
3418
3419#if __X64
3420static void
3421_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3422{
3423 rex(0, 0, r0, _NOREG, r1);
3424 ic(0x63);
3425 rx(r0, 0, r1, _NOREG, _SCL1);
3426}
3427
3428static void
3429_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3430{
3431 jit_int32_t reg;
79bfeef6
PC
3432# if !__X64_32
3433 jit_word_t rel = i0 - _jit->pc.w;
3434 rel = rel < 0 ? rel - 8 : rel + 8;
3435 if (can_sign_extend_int_p(rel)) {
3436 rex(0, 0, r0, _NOREG, _NOREG);
3437 ic(0x63);
3438 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3439 }
3440 else
3441#endif
3442 if (address_p(i0)) {
4a71579b
PC
3443 rex(0, 0, r0, _NOREG, _NOREG);
3444 ic(0x63);
3445 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3446 }
3447 else {
3448 reg = jit_get_reg(jit_class_gpr);
3449 movi(rn(reg), i0);
79bfeef6
PC
3450# if __X64_32
3451 ldr_i(r0, rn(reg));
3452# else
4a71579b 3453 ldr_ui(r0, rn(reg));
79bfeef6 3454# endif
4a71579b
PC
3455 jit_unget_reg(reg);
3456 }
3457}
3458
3459# if !__X64_32
3460static void
3461_ldr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3462{
3463 rex(0, 1, r0, _NOREG, r1);
3464 ic(0x8b);
3465 rx(r0, 0, r1, _NOREG, _SCL1);
3466}
3467
3468static void
3469_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3470{
3471 jit_int32_t reg;
79bfeef6
PC
3472 jit_word_t rel = i0 - _jit->pc.w;
3473 rel = rel < 0 ? rel - 8 : rel + 8;
3474 if (can_sign_extend_int_p(rel)) {
3475 rex(0, WIDE, r0, _NOREG, _NOREG);
3476 ic(0x8b);
3477 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3478 }
3479 else if (can_sign_extend_int_p(i0)) {
3480 rex(0, WIDE, r0, _NOREG, _NOREG);
4a71579b
PC
3481 ic(0x8b);
3482 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3483 }
3484 else {
3485 reg = jit_get_reg(jit_class_gpr);
3486 movi(rn(reg), i0);
3487 ldr_l(r0, rn(reg));
3488 jit_unget_reg(reg);
3489 }
3490}
3491# endif
3492#endif
3493
3494static void
3495_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3496{
3497#if __X64_32
3498 addr(r0, r1, r2);
3499 ldr_c(r0, r0);
3500#else
3501 rex(0, WIDE, r0, r1, r2);
3502 ic(0x0f);
3503 ic(0xbe);
3504 rx(r0, 0, r2, r1, _SCL1);
3505#endif
3506}
3507
3508static void
3509_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3510{
3511 jit_int32_t reg;
3512 if (can_sign_extend_int_p(i0)) {
3513 rex(0, WIDE, r0, _NOREG, r1);
3514 ic(0x0f);
3515 ic(0xbe);
3516 rx(r0, i0, r1, _NOREG, _SCL1);
3517 }
3518 else {
3519 reg = jit_get_reg(jit_class_gpr);
3520 movi(rn(reg), i0);
3521 ldxr_c(r0, r1, rn(reg));
3522 jit_unget_reg(reg);
3523 }
3524}
3525
3526static void
3527_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3528{
3529#if __X64_32
3530 addr(r0, r1, r2);
3531 ldr_uc(r0, r0);
3532#else
3533 rex(0, WIDE, r0, r1, r2);
3534 ic(0x0f);
3535 ic(0xb6);
3536 rx(r0, 0, r2, r1, _SCL1);
3537#endif
3538}
3539
3540static void
3541_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3542{
3543 jit_int32_t reg;
3544 if (can_sign_extend_int_p(i0)) {
3545 rex(0, WIDE, r0, _NOREG, r1);
3546 ic(0x0f);
3547 ic(0xb6);
3548 rx(r0, i0, r1, _NOREG, _SCL1);
3549 }
3550 else {
3551 reg = jit_get_reg(jit_class_gpr);
3552 movi(rn(reg), i0);
3553 ldxr_uc(r0, r1, rn(reg));
3554 jit_unget_reg(reg);
3555 }
3556}
3557
3558static void
3559_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3560{
3561#if __X64_32
3562 addr(r0, r1, r2);
3563 ldr_s(r0, r0);
3564#else
3565 rex(0, WIDE, r0, r1, r2);
3566 ic(0x0f);
3567 ic(0xbf);
3568 rx(r0, 0, r2, r1, _SCL1);
3569#endif
3570}
3571
3572static void
3573_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3574{
3575 jit_int32_t reg;
3576 if (can_sign_extend_int_p(i0)) {
3577 rex(0, WIDE, r0, _NOREG, r1);
3578 ic(0x0f);
3579 ic(0xbf);
3580 rx(r0, i0, r1, _NOREG, _SCL1);
3581 }
3582 else {
3583 reg = jit_get_reg(jit_class_gpr);
3584 movi(rn(reg), i0);
3585 ldxr_s(r0, r1, rn(reg));
3586 jit_unget_reg(reg);
3587 }
3588}
3589
3590static void
3591_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3592{
3593#if __X64_32
3594 addr(r0, r1, r2);
3595 ldr_us(r0, r0);
3596#else
3597 rex(0, WIDE, r0, r1, r2);
3598 ic(0x0f);
3599 ic(0xb7);
3600 rx(r0, 0, r2, r1, _SCL1);
3601#endif
3602}
3603
3604static void
3605_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3606{
3607 jit_int32_t reg;
3608 if (can_sign_extend_int_p(i0)) {
3609 rex(0, WIDE, r0, _NOREG, r1);
3610 ic(0x0f);
3611 ic(0xb7);
3612 rx(r0, i0, r1, _NOREG, _SCL1);
3613 }
3614 else {
3615 reg = jit_get_reg(jit_class_gpr);
3616 movi(rn(reg), i0);
3617 ldxr_us(r0, r1, rn(reg));
3618 jit_unget_reg(reg);
3619 }
3620}
3621
3622#if __X64 || !__X64_32
3623static void
3624_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3625{
3626#if __X64
3627 rex(0, WIDE, r0, r1, r2);
3628 ic(0x63);
3629#else
3630 ic(0x8b);
3631#endif
3632 rx(r0, 0, r2, r1, _SCL1);
3633}
3634
3635static void
3636_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3637{
3638 jit_int32_t reg;
3639 if (can_sign_extend_int_p(i0)) {
3640#if __X64
3641 rex(0, WIDE, r0, _NOREG, r1);
3642 ic(0x63);
3643#else
3644 ic(0x8b);
3645#endif
3646 rx(r0, i0, r1, _NOREG, _SCL1);
3647 }
3648 else {
3649 reg = jit_get_reg(jit_class_gpr);
3650 movi(rn(reg), i0);
3651 ldxr_i(r0, r1, rn(reg));
3652 jit_unget_reg(reg);
3653 }
3654}
3655#endif
3656
3657#if __X64
3658static void
3659_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3660{
3661#if __X64_32
3662 addr(r0, r1, r2);
3663 /* to avoid confusion with macro renames */
3664 _ldr_ui(_jit, r0, r0);
3665#else
3666 rex(0, 0, r0, r1, r2);
3667 ic(0x8b);
3668 rx(r0, 0, r2, r1, _SCL1);
3669#endif
3670}
3671
3672static void
3673_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3674{
3675 jit_int32_t reg;
3676 if (can_sign_extend_int_p(i0)) {
3677 rex(0, 0, r0, _NOREG, r1);
3678 ic(0x8b);
3679 rx(r0, i0, r1, _NOREG, _SCL1);
3680 }
3681 else {
3682 reg = jit_get_reg(jit_class_gpr);
3683 movi(rn(reg), i0);
79bfeef6
PC
3684# if __X64_32
3685 ldxr_i(r0, r1, rn(reg));
3686# else
4a71579b 3687 ldxr_ui(r0, r1, rn(reg));
79bfeef6 3688# endif
4a71579b
PC
3689 jit_unget_reg(reg);
3690 }
3691}
3692
3693# if !__X64_32
3694static void
3695_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3696{
3697 rex(0, 1, r0, r1, r2);
3698 ic(0x8b);
3699 rx(r0, 0, r2, r1, _SCL1);
3700}
3701
3702static void
3703_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3704{
3705 jit_int32_t reg;
3706 if (can_sign_extend_int_p(i0)) {
3707 rex(0, 1, r0, _NOREG, r1);
3708 ic(0x8b);
3709 rx(r0, i0, r1, _NOREG, _SCL1);
3710 }
3711 else {
3712 reg = jit_get_reg(jit_class_gpr);
3713 movi(rn(reg), i0);
3714 ldxr_l(r0, r1, rn(reg));
3715 jit_unget_reg(reg);
3716 }
3717}
3718# endif
3719#endif
3720
3721static void
3722_str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3723{
3724 jit_int32_t reg;
3725 if (reg8_p(r1)) {
3726 rex(0, 0, r1, _NOREG, r0);
3727 ic(0x88);
3728 rx(r1, 0, r0, _NOREG, _SCL1);
3729 }
3730 else {
3731 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3732 movr(rn(reg), r1);
3733 rex(0, 0, rn(reg), _NOREG, r0);
3734 ic(0x88);
3735 rx(rn(reg), 0, r0, _NOREG, _SCL1);
3736 jit_unget_reg(reg);
3737 }
3738}
3739
3740static void
3741_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3742{
3743 jit_int32_t reg;
79bfeef6
PC
3744#if CAN_RIP_ADDRESS
3745 jit_word_t rel = i0 - _jit->pc.w;
3746 rel = rel < 0 ? rel - 16 : rel + 16;
3747 if (can_sign_extend_int_p(rel)) {
3748 if (reg8_p(r0)) {
3749 rex(0, 0, r0, _NOREG, _NOREG);
3750 ic(0x88);
3751 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3752 }
3753 else {
3754 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3755 movr(rn(reg), r0);
3756 rex(0, 0, rn(reg), _NOREG, _NOREG);
3757 ic(0x88);
3758 rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3759 jit_unget_reg(reg);
3760 }
3761 }
3762 else
3763#endif
3764 if (address_p(i0)) {
4a71579b
PC
3765 if (reg8_p(r0)) {
3766 rex(0, 0, r0, _NOREG, _NOREG);
3767 ic(0x88);
3768 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3769 }
3770 else {
3771 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3772 movr(rn(reg), r0);
3773 rex(0, 0, rn(reg), _NOREG, _NOREG);
3774 ic(0x88);
3775 rx(rn(reg), i0, _NOREG, _NOREG, _SCL1);
3776 jit_unget_reg(reg);
3777 }
3778 }
3779 else {
3780 reg = jit_get_reg(jit_class_gpr);
3781 movi(rn(reg), i0);
3782 str_c(rn(reg), r0);
3783 jit_unget_reg(reg);
3784 }
3785}
3786
3787static void
3788_str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3789{
3790 ic(0x66);
3791 rex(0, 0, r1, _NOREG, r0);
3792 ic(0x89);
3793 rx(r1, 0, r0, _NOREG, _SCL1);
3794}
3795
3796static void
3797_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3798{
3799 jit_int32_t reg;
79bfeef6
PC
3800#if CAN_RIP_ADDRESS
3801 jit_word_t rel = i0 - _jit->pc.w;
3802 rel = rel < 0 ? rel - 8 : rel + 8;
3803 if (can_sign_extend_int_p(rel)) {
3804 ic(0x66);
3805 rex(0, 0, r0, _NOREG, _NOREG);
3806 ic(0x89);
3807 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3808 }
3809 else
3810#endif
3811 if (address_p(i0)) {
4a71579b
PC
3812 ic(0x66);
3813 rex(0, 0, r0, _NOREG, _NOREG);
3814 ic(0x89);
3815 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3816 }
3817 else {
3818 reg = jit_get_reg(jit_class_gpr);
3819 movi(rn(reg), i0);
3820 str_s(rn(reg), r0);
3821 jit_unget_reg(reg);
3822 }
3823}
3824
3825static void
3826_str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3827{
3828 rex(0, 0, r1, _NOREG, r0);
3829 ic(0x89);
3830 rx(r1, 0, r0, _NOREG, _SCL1);
3831}
3832
3833static void
3834_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3835{
3836 jit_int32_t reg;
79bfeef6
PC
3837#if CAN_RIP_ADDRESS
3838 jit_word_t rel = i0 - _jit->pc.w;
3839 rel = rel < 0 ? rel - 8 : rel + 8;
3840 if (can_sign_extend_int_p(rel)) {
3841 rex(0, 0, r0, _NOREG, _NOREG);
3842 ic(0x89);
3843 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3844 }
3845 else
3846#endif
3847 if (address_p(i0)) {
4a71579b
PC
3848 rex(0, 0, r0, _NOREG, _NOREG);
3849 ic(0x89);
3850 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3851 }
3852 else {
3853 reg = jit_get_reg(jit_class_gpr);
3854 movi(rn(reg), i0);
3855 str_i(rn(reg), r0);
3856 jit_unget_reg(reg);
3857 }
3858}
3859
3860#if __X64 && !__X64_32
3861static void
3862_str_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3863{
3864 rex(0, 1, r1, _NOREG, r0);
3865 ic(0x89);
3866 rx(r1, 0, r0, _NOREG, _SCL1);
3867}
3868
3869static void
3870_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3871{
3872 jit_int32_t reg;
79bfeef6
PC
3873#if CAN_RIP_ADDRESS
3874 jit_word_t rel = i0 - _jit->pc.w;
3875 rel = rel < 0 ? rel - 8 : rel + 8;
3876 if (can_sign_extend_int_p(rel)) {
3877 rex(0, WIDE, r0, _NOREG, _NOREG);
3878 ic(0x89);
3879 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3880 }
3881 else
3882#endif
4a71579b 3883 if (can_sign_extend_int_p(i0)) {
79bfeef6 3884 rex(0, WIDE, r0, _NOREG, _NOREG);
4a71579b
PC
3885 ic(0x89);
3886 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3887 }
3888 else {
3889 reg = jit_get_reg(jit_class_gpr);
3890 movi(rn(reg), i0);
3891 str_l(rn(reg), r0);
3892 jit_unget_reg(reg);
3893 }
3894}
3895#endif
3896
3897static void
3898_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3899{
3900 jit_int32_t reg;
3901#if __X64_32
3902 reg = jit_get_reg(jit_class_gpr);
3903 addr(rn(reg), r0, r1);
3904 str_c(rn(reg), r2);
3905 jit_unget_reg(reg);
3906#else
3907 if (reg8_p(r2)) {
3908 rex(0, 0, r2, r1, r0);
3909 ic(0x88);
3910 rx(r2, 0, r0, r1, _SCL1);
3911 }
3912 else {
3913 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3914 movr(rn(reg), r2);
3915 rex(0, 0, rn(reg), r1, r0);
3916 ic(0x88);
3917 rx(rn(reg), 0, r0, r1, _SCL1);
3918 jit_unget_reg(reg);
3919 }
3920#endif
3921}
3922
3923static void
3924_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3925{
3926 jit_int32_t reg;
3927 if (can_sign_extend_int_p(i0)) {
3928 if (reg8_p(r1)) {
3929 rex(0, 0, r1, _NOREG, r0);
3930 ic(0x88);
3931 rx(r1, i0, r0, _NOREG, _SCL1);
3932 }
3933 else {
3934 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3935 movr(rn(reg), r1);
3936 rex(0, 0, rn(reg), _NOREG, r0);
3937 ic(0x88);
3938 rx(rn(reg), i0, r0, _NOREG, _SCL1);
3939 jit_unget_reg(reg);
3940 }
3941 }
3942 else {
3943 reg = jit_get_reg(jit_class_gpr);
3944 movi(rn(reg), i0);
3945 stxr_c(rn(reg), r0, r1);
3946 jit_unget_reg(reg);
3947 }
3948}
3949
3950static void
3951_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3952{
3953#if __X64_32
3954 jit_int32_t reg;
3955 reg = jit_get_reg(jit_class_gpr);
3956 addr(rn(reg), r0, r1);
3957 str_s(rn(reg), r2);
3958 jit_unget_reg(reg);
3959#else
3960 ic(0x66);
3961 rex(0, 0, r2, r1, r0);
3962 ic(0x89);
3963 rx(r2, 0, r0, r1, _SCL1);
3964#endif
3965}
3966
3967static void
3968_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3969{
3970 jit_int32_t reg;
3971 if (can_sign_extend_int_p(i0)) {
3972 ic(0x66);
3973 rex(0, 0, r1, _NOREG, r0);
3974 ic(0x89);
3975 rx(r1, i0, r0, _NOREG, _SCL1);
3976 }
3977 else {
3978 reg = jit_get_reg(jit_class_gpr);
3979 movi(rn(reg), i0);
3980 stxr_s(rn(reg), r0, r1);
3981 jit_unget_reg(reg);
3982 }
3983}
3984
3985static void
3986_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3987{
3988#if __X64_32
3989 jit_int32_t reg;
3990 reg = jit_get_reg(jit_class_gpr);
3991 addr(rn(reg), r0, r1);
3992 str_i(rn(reg), r2);
3993 jit_unget_reg(reg);
3994#else
3995 rex(0, 0, r2, r1, r0);
3996 ic(0x89);
3997 rx(r2, 0, r0, r1, _SCL1);
3998#endif
3999}
4000
4001static void
4002_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4003{
4004 jit_int32_t reg;
4005 if (can_sign_extend_int_p(i0)) {
4006 rex(0, 0, r1, _NOREG, r0);
4007 ic(0x89);
4008 rx(r1, i0, r0, _NOREG, _SCL1);
4009 }
4010 else {
4011 reg = jit_get_reg(jit_class_gpr);
4012 movi(rn(reg), i0);
4013 stxr_i(rn(reg), r0, r1);
4014 jit_unget_reg(reg);
4015 }
4016}
4017
4018#if __X64 && !__X64_32
4019static void
4020_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
4021{
4022 rex(0, 1, r2, r1, r0);
4023 ic(0x89);
4024 rx(r2, 0, r0, r1, _SCL1);
4025}
4026
4027static void
4028_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4029{
4030 jit_int32_t reg;
4031 if (can_sign_extend_int_p(i0)) {
4032 rex(0, 1, r1, _NOREG, r0);
4033 ic(0x89);
4034 rx(r1, i0, r0, _NOREG, _SCL1);
4035 }
4036 else {
4037 reg = jit_get_reg(jit_class_gpr);
4038 movi(rn(reg), i0);
4039 stxr_l(rn(reg), r0, r1);
4040 jit_unget_reg(reg);
4041 }
4042}
4043#endif
4044
79bfeef6 4045static jit_word_t
4a71579b
PC
4046_jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
4047{
79bfeef6 4048 jit_word_t d;
4a71579b 4049 jit_word_t w;
79bfeef6 4050 w = _jit->pc.w;
ba86ff93 4051 d = i0 - (w + 2);
4a71579b 4052 ic(0x70 | code);
79bfeef6
PC
4053 ic(d);
4054 return (w);
4a71579b
PC
4055}
4056
79bfeef6 4057static jit_word_t
4a71579b
PC
4058_jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
4059{
79bfeef6 4060 jit_word_t d;
4a71579b 4061 jit_word_t w;
79bfeef6 4062 w = _jit->pc.w;
4a71579b 4063 ic(0x0f);
79bfeef6 4064 d = i0 - (w + 6);
4a71579b 4065 ic(0x80 | code);
79bfeef6
PC
4066 ii(d);
4067 return (w);
4a71579b
PC
4068}
4069
79bfeef6 4070static jit_word_t
4a71579b
PC
4071_jcr(jit_state_t *_jit,
4072 jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4073{
4074 alur(X86_CMP, r0, r1);
79bfeef6 4075 return (jcc(code, i0));
4a71579b
PC
4076}
4077
79bfeef6 4078static jit_word_t
4a71579b
PC
4079_jci(jit_state_t *_jit,
4080 jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4081{
4082 alui(X86_CMP, r0, i1);
79bfeef6 4083 return (jcc(code, i0));
4a71579b
PC
4084}
4085
79bfeef6 4086static jit_word_t
4a71579b
PC
4087_jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
4088{
4089 testr(r0, r0);
79bfeef6 4090 return (jcc(code, i0));
4a71579b
PC
4091}
4092
4093static jit_word_t
4094_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4095{
79bfeef6 4096 return (jcr(X86_CC_L, i0, r0, r1));
4a71579b
PC
4097}
4098
4099static jit_word_t
4100_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4101{
79bfeef6
PC
4102 jit_word_t w;
4103 if (i1) w = jci (X86_CC_L, i0, r0, i1);
4104 else w = jci0(X86_CC_S, i0, r0);
4105 return (w);
4a71579b
PC
4106}
4107
4108static jit_word_t
4109_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4110{
79bfeef6 4111 return (jcr(X86_CC_B, i0, r0, r1));
4a71579b
PC
4112}
4113
4114static jit_word_t
4115_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4116{
79bfeef6
PC
4117 jit_word_t w;
4118 if (i1) w = jci (X86_CC_B, i0, r0, i1);
4119 else w = jci0(X86_CC_B, i0, r0);
4120 return (w);
4a71579b
PC
4121}
4122
4123static jit_word_t
4124_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4125{
79bfeef6
PC
4126 jit_word_t w;
4127 if (r0 == r1) w = jmpi(i0);
4128 else w = jcr (X86_CC_LE, i0, r0, r1);
4129 return (w);
4a71579b
PC
4130}
4131
4132static jit_word_t
4133_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4134{
79bfeef6
PC
4135 jit_word_t w;
4136 if (i1) w = jci (X86_CC_LE, i0, r0, i1);
4137 else w = jci0(X86_CC_LE, i0, r0);
4138 return (w);
4a71579b
PC
4139}
4140
4141static jit_word_t
4142_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4143{
79bfeef6
PC
4144 jit_word_t w;
4145 if (r0 == r1) w = jmpi(i0);
4146 else w = jcr (X86_CC_BE, i0, r0, r1);
4147 return (w);
4a71579b
PC
4148}
4149
4150static jit_word_t
4151_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4152{
79bfeef6
PC
4153 jit_word_t w;
4154 if (i1) w = jci (X86_CC_BE, i0, r0, i1);
4155 else w = jci0(X86_CC_BE, i0, r0);
4156 return (w);
4a71579b
PC
4157}
4158
4159static jit_word_t
4160_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4161{
79bfeef6
PC
4162 jit_word_t w;
4163 if (r0 == r1) w = jmpi(i0);
4164 else w = jcr (X86_CC_E, i0, r0, r1);
4165 return (w);
4a71579b
PC
4166}
4167
4168static jit_word_t
4169_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4170{
79bfeef6
PC
4171 jit_word_t w;
4172 if (i1) w = jci (X86_CC_E, i0, r0, i1);
4173 else w = jci0(X86_CC_E, i0, r0);
4174 return (w);
4a71579b
PC
4175}
4176
4177static jit_word_t
4178_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4179{
79bfeef6
PC
4180 jit_word_t w;
4181 if (r0 == r1) w = jmpi(i0);
4182 else w = jcr (X86_CC_GE, i0, r0, r1);
4183 return (w);
4a71579b
PC
4184}
4185
4186static jit_word_t
4187_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4188{
79bfeef6
PC
4189 jit_word_t w;
4190 if (i1) w = jci (X86_CC_GE, i0, r0, i1);
4191 else w = jci0(X86_CC_NS, i0, r0);
4192 return (w);
4a71579b
PC
4193}
4194
4195static jit_word_t
4196_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4197{
79bfeef6
PC
4198 jit_word_t w;
4199 if (r0 == r1) w = jmpi(i0);
4200 else w = jcr (X86_CC_AE, i0, r0, r1);
4201 return (w);
4a71579b
PC
4202}
4203
4204static jit_word_t
4205_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4206{
79bfeef6
PC
4207 jit_word_t w;
4208 if (i1) w = jci (X86_CC_AE, i0, r0, i1);
4209 else w = jmpi(i0);
4210 return (w);
4a71579b
PC
4211}
4212
4213static jit_word_t
4214_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4215{
79bfeef6 4216 return (jcr(X86_CC_G, i0, r0, r1));
4a71579b
PC
4217}
4218
4219static jit_word_t
4220_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4221{
79bfeef6 4222 return (jci(X86_CC_G, i0, r0, i1));
4a71579b
PC
4223}
4224
4225static jit_word_t
4226_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4227{
79bfeef6 4228 return (jcr(X86_CC_A, i0, r0, r1));
4a71579b
PC
4229}
4230
4231static jit_word_t
4232_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4233{
79bfeef6
PC
4234 jit_word_t w;
4235 if (i1) w = jci (X86_CC_A, i0, r0, i1);
4236 else w = jci0(X86_CC_NE, i0, r0);
4237 return (w);
4a71579b
PC
4238}
4239
4240static jit_word_t
4241_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4242{
79bfeef6 4243 return (jcr(X86_CC_NE, i0, r0, r1));
4a71579b
PC
4244}
4245
4246static jit_word_t
4247_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4248{
79bfeef6
PC
4249 jit_word_t w;
4250 if (i1) w = jci (X86_CC_NE, i0, r0, i1);
4251 else w = jci0(X86_CC_NE, i0, r0);
4252 return (w);
4a71579b
PC
4253}
4254
4255static jit_word_t
4256_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4257{
4258 testr(r0, r1);
79bfeef6 4259 return (jnz(i0));
4a71579b
PC
4260}
4261
4262static jit_word_t
4263_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4264{
4265 jit_int32_t reg;
4266 if (can_zero_extend_int_p(i1))
4267 testi(r0, i1);
4268 else {
4269 reg = jit_get_reg(jit_class_gpr);
4270 movi(rn(reg), i1);
4271 testr(r0, rn(reg));
4272 jit_unget_reg(reg);
4273 }
79bfeef6 4274 return (jnz(i0));
4a71579b
PC
4275}
4276
4277static jit_word_t
4278_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4279{
4280 testr(r0, r1);
79bfeef6 4281 return (jz(i0));
4a71579b
PC
4282}
4283
4284static jit_word_t
4285_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4286{
4287 jit_int32_t reg;
4288 if (can_zero_extend_int_p(i1))
4289 testi(r0, i1);
4290 else {
4291 reg = jit_get_reg(jit_class_gpr);
4292 movi(rn(reg), i1);
4293 testr(r0, rn(reg));
4294 jit_unget_reg(reg);
4295 }
79bfeef6 4296 return (jz(i0));
4a71579b
PC
4297}
4298
4299static jit_word_t
4300_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4301{
4302 iaddr(r0, r1);
79bfeef6 4303 return (jo(i0));
4a71579b
PC
4304}
4305
4306static jit_word_t
4307_boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4308{
4309 jit_int32_t reg;
4310 if (can_sign_extend_int_p(i1)) {
4311 iaddi(r0, i1);
79bfeef6 4312 return (jo(i0));
4a71579b
PC
4313 }
4314 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4315 movi(rn(reg), i1);
4316 jit_unget_reg(reg);
4317 return (boaddr(i0, r0, rn(reg)));
4318}
4319
4320static jit_word_t
4321_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4322{
4323 iaddr(r0, r1);
79bfeef6 4324 return (jc(i0));
4a71579b
PC
4325}
4326
4327static jit_word_t
4328_boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4329{
4330 jit_int32_t reg;
4331 if (can_sign_extend_int_p(i1)) {
4332 iaddi(r0, i1);
79bfeef6 4333 return (jc(i0));
4a71579b
PC
4334 }
4335 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4336 movi(rn(reg), i1);
4337 jit_unget_reg(reg);
4338 return (boaddr_u(i0, r0, rn(reg)));
4339}
4340
4341static jit_word_t
4342_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4343{
4344 iaddr(r0, r1);
79bfeef6 4345 return (jno(i0));
4a71579b
PC
4346}
4347
4348static jit_word_t
4349_bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4350{
4351 jit_int32_t reg;
4352 if (can_sign_extend_int_p(i1)) {
4353 iaddi(r0, i1);
79bfeef6 4354 return (jno(i0));
4a71579b
PC
4355 }
4356 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4357 movi(rn(reg), i1);
4358 jit_unget_reg(reg);
4359 return (bxaddr(i0, r0, rn(reg)));
4360}
4361
4362static jit_word_t
4363_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4364{
4365 iaddr(r0, r1);
79bfeef6 4366 return (jnc(i0));
4a71579b
PC
4367}
4368
4369static jit_word_t
4370_bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4371{
4372 jit_int32_t reg;
4373 if (can_sign_extend_int_p(i1)) {
4374 iaddi(r0, i1);
79bfeef6 4375 return (jnc(i0));
4a71579b
PC
4376 }
4377 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4378 movi(rn(reg), i1);
4379 jit_unget_reg(reg);
4380 return (bxaddr_u(i0, r0, rn(reg)));
4381}
4382
4383static jit_word_t
4384_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4385{
4386 isubr(r0, r1);
79bfeef6 4387 return (jo(i0));
4a71579b
PC
4388}
4389
4390static jit_word_t
4391_bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4392{
4393 jit_int32_t reg;
4394 if (can_sign_extend_int_p(i1)) {
4395 isubi(r0, i1);
79bfeef6 4396 return (jo(i0));
4a71579b
PC
4397 }
4398 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4399 movi(rn(reg), i1);
4400 jit_unget_reg(reg);
4401 return (bosubr(i0, r0, rn(reg)));
4402}
4403
4404static jit_word_t
4405_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4406{
4407 isubr(r0, r1);
79bfeef6 4408 return (jc(i0));
4a71579b
PC
4409}
4410
4411static jit_word_t
4412_bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4413{
4414 jit_int32_t reg;
4415 if (can_sign_extend_int_p(i1)) {
4416 isubi(r0, i1);
79bfeef6 4417 return (jc(i0));
4a71579b
PC
4418 }
4419 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4420 movi(rn(reg), i1);
4421 jit_unget_reg(reg);
4422 return (bosubr_u(i0, r0, rn(reg)));
4423}
4424
4425static jit_word_t
4426_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4427{
4428 isubr(r0, r1);
79bfeef6 4429 return (jno(i0));
4a71579b
PC
4430}
4431
4432static jit_word_t
4433_bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4434{
4435 jit_int32_t reg;
4436 if (can_sign_extend_int_p(i1)) {
4437 isubi(r0, i1);
79bfeef6 4438 return (jno(i0));
4a71579b
PC
4439 }
4440 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4441 movi(rn(reg), i1);
4442 jit_unget_reg(reg);
4443 return (bxsubr(i0, r0, rn(reg)));
4444}
4445
4446static jit_word_t
4447_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4448{
4449 isubr(r0, r1);
79bfeef6 4450 return (jnc(i0));
4a71579b
PC
4451}
4452
4453static jit_word_t
4454_bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4455{
4456 jit_int32_t reg;
4457 if (can_sign_extend_int_p(i1)) {
4458 isubi(r0, i1);
79bfeef6 4459 return (jnc(i0));
4a71579b
PC
4460 }
4461 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4462 movi(rn(reg), i1);
4463 jit_unget_reg(reg);
4464 return (bxsubr_u(i0, r0, rn(reg)));
4465}
4466
4467static void
4468_callr(jit_state_t *_jit, jit_int32_t r0)
4469{
4470 rex(0, 0, _NOREG, _NOREG, r0);
4471 ic(0xff);
4472 mrm(0x03, 0x02, r7(r0));
4473}
4474
4475static jit_word_t
4476_calli(jit_state_t *_jit, jit_word_t i0)
4477{
519a9ea1 4478 jit_word_t w;
79bfeef6
PC
4479 jit_word_t d;
4480 jit_word_t l = _jit->pc.w + 5;
4481 d = i0 - l;
4a71579b 4482#if __X64
79bfeef6
PC
4483 if (
4484# if __X64_32
4485 !((d < 0) ^ (l < 0)) &&
4486# endif
4487 (jit_int32_t)d == d) {
519a9ea1 4488#endif
79bfeef6 4489 w = _jit->pc.w;
519a9ea1 4490 ic(0xe8);
79bfeef6 4491 ii(d);
519a9ea1
PC
4492#if __X64
4493 }
4494 else
79bfeef6 4495 w = calli_p(i0);
519a9ea1 4496#endif
79bfeef6 4497 return (w);
519a9ea1 4498}
4a71579b 4499
519a9ea1
PC
4500#if __X64
4501static jit_word_t
4502_calli_p(jit_state_t *_jit, jit_word_t i0)
4503{
79bfeef6 4504 jit_word_t w;
519a9ea1 4505 jit_int32_t reg;
4a71579b 4506 reg = jit_get_reg(jit_class_gpr);
79bfeef6 4507 w = movi_p(rn(reg), i0);
4a71579b
PC
4508 callr(rn(reg));
4509 jit_unget_reg(reg);
79bfeef6 4510 return (w);
4a71579b 4511}
519a9ea1 4512#endif
4a71579b
PC
4513
4514static void
4515_jmpr(jit_state_t *_jit, jit_int32_t r0)
4516{
519a9ea1 4517 rex(0, 0, _NOREG, _NOREG, r0);
4a71579b
PC
4518 ic(0xff);
4519 mrm(0x03, 0x04, r7(r0));
4520}
4521
4522static jit_word_t
4523_jmpi(jit_state_t *_jit, jit_word_t i0)
4524{
4525 jit_word_t w;
79bfeef6
PC
4526 jit_word_t d;
4527 jit_word_t l = _jit->pc.w + 5;
4528 d = i0 - l;
519a9ea1 4529#if __X64
79bfeef6
PC
4530 if (
4531# if __X64_32
4532 !((d < 0) ^ (l < 0)) &&
4533# endif
4534 (jit_int32_t)d == d) {
519a9ea1 4535#endif
79bfeef6 4536 w = _jit->pc.w;
519a9ea1 4537 ic(0xe9);
79bfeef6 4538 ii(d);
519a9ea1
PC
4539#if __X64
4540 }
4541 else
79bfeef6 4542 w = jmpi_p(i0);
519a9ea1 4543#endif
79bfeef6 4544 return (w);
4a71579b
PC
4545}
4546
519a9ea1
PC
4547#if __X64
4548static jit_word_t
4549_jmpi_p(jit_state_t *_jit, jit_word_t i0)
4550{
79bfeef6 4551 jit_word_t w;
519a9ea1
PC
4552 jit_int32_t reg;
4553 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
79bfeef6 4554 w = movi_p(rn(reg), i0);
519a9ea1
PC
4555 jmpr(rn(reg));
4556 jit_unget_reg(reg);
79bfeef6 4557 return (w);
519a9ea1
PC
4558}
4559#endif
4560
79bfeef6 4561static jit_word_t
4a71579b
PC
4562_jmpsi(jit_state_t *_jit, jit_uint8_t i0)
4563{
79bfeef6 4564 jit_word_t w = _jit->pc.w;
4a71579b
PC
4565 ic(0xeb);
4566 ic(i0);
79bfeef6 4567 return (w);
4a71579b 4568}
ba86ff93
PC
4569#undef clear
4570#undef allocr
4571#undef savset
4a71579b
PC
4572
4573static void
4574_prolog(jit_state_t *_jit, jit_node_t *node)
4575{
79bfeef6 4576 jit_int32_t reg, offs;
4a71579b
PC
4577 if (_jitc->function->define_frame || _jitc->function->assume_frame) {
4578 jit_int32_t frame = -_jitc->function->frame;
79bfeef6 4579 jit_check_frame();
4a71579b
PC
4580 assert(_jitc->function->self.aoff >= frame);
4581 if (_jitc->function->assume_frame)
4582 return;
4583 _jitc->function->self.aoff = frame;
4584 }
4585 if (_jitc->function->allocar)
4586 _jitc->function->self.aoff &= -16;
4587#if __X64 && (__CYGWIN__ || _WIN32)
4588 _jitc->function->stack = (((/* first 32 bytes must be allocated */
4589 (_jitc->function->self.alen > 32 ?
4590 _jitc->function->self.alen : 32) -
4591 /* align stack at 16 bytes */
79bfeef6 4592 _jitc->function->self.aoff) + 15) & -16);
4a71579b
PC
4593#else
4594 _jitc->function->stack = (((_jitc->function->self.alen -
79bfeef6 4595 _jitc->function->self.aoff) + 15) & -16);
4a71579b 4596#endif
79bfeef6
PC
4597
4598 if (_jitc->function->stack)
4599 _jitc->function->need_stack = 1;
4600
4601 if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
4602 /* check if any callee save register needs to be saved */
4603 for (reg = 0; reg < _jitc->reglen; ++reg)
4604 if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
4605 (_rvs[reg].spec & jit_class_sav)) {
4606 _jitc->function->need_stack = 1;
4607 break;
4608 }
4609 }
4610
4611 if (_jitc->function->need_frame || _jitc->function->need_stack)
4612 subi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4a71579b 4613 /* callee save registers */
79bfeef6
PC
4614 for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
4615 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4616 stxi(offs, _RSP_REGNO, rn(iregs[reg]));
4617 offs += REAL_WORDSIZE;
4618 }
4619 }
4620#if __X64 && (__CYGWIN__ || _WIN32)
4621 for (reg = 0; reg < jit_size(fregs); reg++) {
4622 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
4623 sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg]));
4624 offs += sizeof(jit_float64_t);
4625 }
4626 }
4a71579b 4627#endif
79bfeef6
PC
4628
4629 if (_jitc->function->need_frame) {
4630 stxi(0, _RSP_REGNO, _RBP_REGNO);
4631 movr(_RBP_REGNO, _RSP_REGNO);
4632 }
4a71579b
PC
4633
4634 /* alloca */
79bfeef6
PC
4635 if (_jitc->function->stack)
4636 subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
4a71579b
PC
4637 if (_jitc->function->allocar) {
4638 reg = jit_get_reg(jit_class_gpr);
4639 movi(rn(reg), _jitc->function->self.aoff);
4640 stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg));
4641 jit_unget_reg(reg);
4642 }
4643
4644#if __X64 && !(__CYGWIN__ || _WIN32)
4645 if (_jitc->function->self.call & jit_call_varargs) {
4646 jit_word_t nofp_code;
4647
4648 /* Save gp registers in the save area, if any is a vararg */
4649 for (reg = first_gp_from_offset(_jitc->function->vagp);
4650 jit_arg_reg_p(reg); ++reg)
4651 stxi(_jitc->function->vaoff + first_gp_offset +
4652 reg * 8, _RBP_REGNO, rn(JIT_RA0 - reg));
4653
4654 reg = first_fp_from_offset(_jitc->function->vafp);
4655 if (jit_arg_f_reg_p(reg)) {
4656 /* Skip over if no float registers were passed as argument */
4657 /* test %al, %al */
4658 ic(0x84);
4659 ic(0xc0);
79bfeef6 4660 nofp_code = jes(0);
4a71579b
PC
4661
4662 /* Save fp registers in the save area, if any is a vararg */
4663 /* Note that the full 16 byte xmm is not saved, because
4664 * lightning only handles float and double, and, while
4665 * attempting to provide a va_list compatible pointer as
4666 * jit_va_start return, does not guarantee it (on all ports). */
4667 for (; jit_arg_f_reg_p(reg); ++reg)
4668 sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
4669 reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
4670
79bfeef6 4671 patch_at(nofp_code, _jit->pc.w);
4a71579b
PC
4672 }
4673 }
4674#endif
4675}
4676
4677static void
4678_epilog(jit_state_t *_jit, jit_node_t *node)
4679{
79bfeef6 4680 jit_int32_t reg, offs;
4a71579b
PC
4681 if (_jitc->function->assume_frame)
4682 return;
79bfeef6
PC
4683 if (_jitc->function->need_frame)
4684 movr(_RSP_REGNO, _RBP_REGNO);
4685
4a71579b 4686 /* callee save registers */
79bfeef6
PC
4687 for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
4688 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4689 ldxi(rn(iregs[reg]), _RSP_REGNO, offs);
4690 offs += REAL_WORDSIZE;
4691 }
4692 }
4693#if __X64 && (__CYGWIN__ || _WIN32)
4694 for (reg = 0; reg < jit_size(fregs); reg++) {
4695 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
4696 sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs);
4697 offs += sizeof(jit_float64_t);
4698 }
4699 }
4a71579b 4700#endif
79bfeef6
PC
4701
4702 if (_jitc->function->need_frame) {
4703 ldxi(_RBP_REGNO, _RSP_REGNO, 0);
4704 addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4705 }
4706 /* This condition does not happen as much as expected because
4707 * it is not safe to not create a frame pointer if any function
4708 * is called, even jit functions, as those might call external
4709 * functions. */
4710 else if (_jitc->function->need_stack)
4711 addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4a71579b
PC
4712
4713 ic(0xc3);
4714}
4715
4716static void
4717_vastart(jit_state_t *_jit, jit_int32_t r0)
4718{
4719#if __X32 || __CYGWIN__ || _WIN32
4720 assert(_jitc->function->self.call & jit_call_varargs);
79bfeef6 4721 addi(r0, _RBP_REGNO, jit_selfsize());
4a71579b
PC
4722#else
4723 jit_int32_t reg;
4724
4725 assert(_jitc->function->self.call & jit_call_varargs);
4726
4727 /* Return jit_va_list_t in the register argument */
4728 addi(r0, _RBP_REGNO, _jitc->function->vaoff);
4729 reg = jit_get_reg(jit_class_gpr);
4730
4731 /* Initialize gp offset in the save area. */
4732 movi(rn(reg), _jitc->function->vagp);
4733 stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
4734
4735 /* Initialize fp offset in the save area. */
4736 movi(rn(reg), _jitc->function->vafp);
4737 stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
4738
4739 /* Initialize overflow pointer to the first stack argument. */
79bfeef6 4740 addi(rn(reg), _RBP_REGNO, jit_selfsize());
4a71579b
PC
4741 stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
4742
4743 /* Initialize register save area pointer. */
4744 addi(rn(reg), r0, first_gp_offset);
4745 stxi(offsetof(jit_va_list_t, save), r0, rn(reg));
4746
4747 jit_unget_reg(reg);
4748#endif
4749}
4750
4751static void
4752_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4753{
4754#if __X32 || __CYGWIN__ || _WIN32
4755 assert(_jitc->function->self.call & jit_call_varargs);
4756 ldr(r0, r1);
4757 addi(r1, r1, va_gp_increment);
4758#else
4759 jit_int32_t rg0;
4760 jit_int32_t rg1;
4761 jit_word_t ge_code;
4762 jit_word_t lt_code;
4763
4764 assert(_jitc->function->self.call & jit_call_varargs);
4765
4766 rg0 = jit_get_reg(jit_class_gpr);
4767 rg1 = jit_get_reg(jit_class_gpr);
4768
4769 /* Load the gp offset in save area in the first temporary. */
4770 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
4771
4772 /* Jump over if there are no remaining arguments in the save area. */
4773 icmpi(rn(rg0), va_gp_max_offset);
79bfeef6 4774 ge_code = jaes(0);
4a71579b
PC
4775
4776 /* Load the save area pointer in the second temporary. */
4777 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
4778
4779 /* Load the vararg argument in the first argument. */
4780 ldxr(r0, rn(rg1), rn(rg0));
4781
4782 /* Update the gp offset. */
4783 addi(rn(rg0), rn(rg0), 8);
4784 stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
4785
4786 /* Will only need one temporary register below. */
4787 jit_unget_reg(rg1);
4788
4789 /* Jump over overflow code. */
79bfeef6 4790 lt_code = jmpsi(0);
4a71579b
PC
4791
4792 /* Where to land if argument is in overflow area. */
79bfeef6 4793 patch_at(ge_code, _jit->pc.w);
4a71579b
PC
4794
4795 /* Load overflow pointer. */
4796 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
4797
4798 /* Load argument. */
4799 ldr(r0, rn(rg0));
4800
4801 /* Update overflow pointer. */
4802 addi(rn(rg0), rn(rg0), va_gp_increment);
4803 stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
4804
4805 /* Where to land if argument is in save area. */
79bfeef6 4806 patch_at(lt_code, _jit->pc.w);
4a71579b
PC
4807
4808 jit_unget_reg(rg0);
4809#endif
4810}
4811
4812/* The x87 boolean argument tells if will put the result in a x87
4813 * register if non false, in a sse register otherwise. */
4814static void
4815_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
4816{
4817#if __X32 || __CYGWIN__ || _WIN32
4818 assert(_jitc->function->self.call & jit_call_varargs);
4819 if (x87)
4820 x87_ldr_d(r0, r1);
4821 else
4822 sse_ldr_d(r0, r1);
4823 addi(r1, r1, 8);
4824#else
4825 jit_int32_t rg0;
4826 jit_int32_t rg1;
4827 jit_word_t ge_code;
4828 jit_word_t lt_code;
4829
4830 assert(_jitc->function->self.call & jit_call_varargs);
4831
4832 rg0 = jit_get_reg(jit_class_gpr);
4833 rg1 = jit_get_reg(jit_class_gpr);
4834
4835 /* Load the fp offset in save area in the first temporary. */
4836 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
4837
4838 /* Jump over if there are no remaining arguments in the save area. */
4839 icmpi(rn(rg0), va_fp_max_offset);
79bfeef6 4840 ge_code = jaes(0);
4a71579b
PC
4841
4842 /* Load the save area pointer in the second temporary. */
4843 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
4844
4845 /* Load the vararg argument in the first argument. */
4846 if (x87)
4847 x87_ldxr_d(r0, rn(rg1), rn(rg0));
4848 else
4849 sse_ldxr_d(r0, rn(rg1), rn(rg0));
4850
4851 /* Update the fp offset. */
4852 addi(rn(rg0), rn(rg0), va_fp_increment);
4853 stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
4854
4855 /* Will only need one temporary register below. */
4856 jit_unget_reg(rg1);
4857
4858 /* Jump over overflow code. */
79bfeef6 4859 lt_code = jmpsi(0);
4a71579b
PC
4860
4861 /* Where to land if argument is in overflow area. */
79bfeef6 4862 patch_at(ge_code, _jit->pc.w);
4a71579b
PC
4863
4864 /* Load overflow pointer. */
4865 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
4866
4867 /* Load argument. */
4868 if (x87)
4869 x87_ldr_d(r0, rn(rg0));
4870 else
4871 sse_ldr_d(r0, rn(rg0));
4872
4873 /* Update overflow pointer. */
4874 addi(rn(rg0), rn(rg0), 8);
4875 stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
4876
4877 /* Where to land if argument is in save area. */
79bfeef6 4878 patch_at(lt_code, _jit->pc.w);
4a71579b
PC
4879
4880 jit_unget_reg(rg0);
4881#endif
4882}
4883
4884static void
79bfeef6 4885_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
4a71579b 4886{
79bfeef6
PC
4887 jit_word_t disp;
4888 jit_uint8_t *code = (jit_uint8_t *)instr;
4889 ++instr;
4890 switch (code[0]) {
4891 /* movi_p */
4892 case 0xb8 ... 0xbf:
4893 *(jit_word_t *)instr = label;
4a71579b 4894 break;
79bfeef6
PC
4895 /* forward pc relative address known to be in range */
4896#if CAN_RIP_ADDRESS
4897 /* movi */
4898 case 0x8d:
4899 ++instr;
4900 goto apply;
4901#endif
4902 /* jcc */
4903 case 0x0f:
4904 ++instr;
4905 if (code[1] < 0x80 || code[1] > 0x8f)
4906 goto fail;
4907 /* calli */
4908 case 0xe8:
4909 /* jmpi */
4910 case 0xe9:
4911#if CAN_RIP_ADDRESS
4912 apply:
4913#endif
4914 disp = label - (instr + 4);
4915 assert((jit_int32_t)disp == disp);
4916 *(jit_int32_t *)instr = disp;
4917 break;
4918 /* jccs */
4919 case 0x70 ... 0x7f:
4920 /* jmpsi */
4921 case 0xeb:
4922 disp = label - (instr + 1);
4923 assert((jit_int8_t)disp == disp);
4924 *(jit_int8_t *)instr = disp;
4a71579b 4925 break;
79bfeef6
PC
4926 default:
4927 fail:
4928 abort();
4a71579b
PC
4929 }
4930}
4931#endif