2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 * This file is part of GNU lightning.
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
17 * Paulo Cesar Pereira de Andrade
20 /* avoid using it due to partial stalls */
24 # if __WORDSIZE == 64 && _WIN32
29 # if __X32 || __X64_32
31 # define ldi(u, v) ldi_i(u, v)
32 # define ldr(u, v) ldr_i(u, v)
33 # define ldxr(u, v, w) ldxr_i(u, v, w)
34 # define ldxi(u, v, w) ldxi_i(u, v, w)
35 # define str(u, v) str_i(u, v)
36 # define sti(u, v) sti_i(u, v)
37 # define stxr(u, v, w) stxr_i(u, v, w)
38 # define stxi(u, v, w) stxi_i(u, v, w)
39 # define can_sign_extend_int_p(im) 1
40 # define can_zero_extend_int_p(im) 1
41 # define fits_uint32_p(im) 1
44 # define ldi(u, v) ldi_l(u, v)
45 # define ldr(u, v) ldr_l(u, v)
46 # define ldxr(u, v, w) ldxr_l(u, v, w)
47 # define ldxi(u, v, w) ldxi_l(u, v, w)
48 # define str(u, v) str_l(u, v)
49 # define sti(u, v) sti_l(u, v)
50 # define stxr(u, v, w) stxr_l(u, v, w)
51 # define stxi(u, v, w) stxi_l(u, v, w)
52 # define can_sign_extend_int_p(im) \
53 (((long long)(im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \
54 ((long long)(im) < 0 && (long long)(im) > -0x80000000LL))
55 # define can_zero_extend_int_p(im) \
56 ((im) >= 0 && (im) < 0x80000000LL)
57 # define fits_uint32_p(im) (((im) & 0xffffffff00000000LL) == 0)
59 # if __X32 || __CYGWIN__ || __X64_32 || _WIN32
61 ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
75 # define _R10_REGNO 10
76 # define _R11_REGNO 11
77 # define _R12_REGNO 12
78 # define _R13_REGNO 13
79 # define _R14_REGNO 14
80 # define _R15_REGNO 15
81 # define r7(reg) ((reg) & 7)
82 # define r8(reg) ((reg) & 15)
88 # define X86_OR 1 << 3
89 # define X86_ADC 2 << 3
90 # define X86_SBB 3 << 3
91 # define X86_AND 4 << 3
92 # define X86_SUB 5 << 3
93 # define X86_XOR 6 << 3
94 # define X86_CMP 7 << 3
108 # define X86_CC_O 0x0
109 # define X86_CC_NO 0x1
110 # define X86_CC_NAE 0x2
111 # define X86_CC_B 0x2
112 # define X86_CC_C 0x2
113 # define X86_CC_AE 0x3
114 # define X86_CC_NB 0x3
115 # define X86_CC_NC 0x3
116 # define X86_CC_E 0x4
117 # define X86_CC_Z 0x4
118 # define X86_CC_NE 0x5
119 # define X86_CC_NZ 0x5
120 # define X86_CC_BE 0x6
121 # define X86_CC_NA 0x6
122 # define X86_CC_A 0x7
123 # define X86_CC_NBE 0x7
124 # define X86_CC_S 0x8
125 # define X86_CC_NS 0x9
126 # define X86_CC_P 0xa
127 # define X86_CC_PE 0xa
128 # define X86_CC_NP 0xb
129 # define X86_CC_PO 0xb
130 # define X86_CC_L 0xc
131 # define X86_CC_NGE 0xc
132 # define X86_CC_GE 0xd
133 # define X86_CC_NL 0xd
134 # define X86_CC_LE 0xe
135 # define X86_CC_NG 0xe
136 # define X86_CC_G 0xf
137 # define X86_CC_NLE 0xf
138 # define mrm(md, r, m) *_jit->pc.uc++ = (md<<6) | (r<<3) | m
139 # define sib(sc, i, b) *_jit->pc.uc++ = (sc<<6) | (i<<3) | b
140 # define ic(c) *_jit->pc.uc++ = c
141 # define is(s) *_jit->pc.us++ = s
142 # define ii(i) *_jit->pc.ui++ = i
143 # if __X64 && !__X64_32
144 # define il(l) *_jit->pc.ul++ = l
148 # define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b)
150 _rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
151 # define rx(rd, md, rb, ri, ms) _rx(_jit, rd, md, rb, ri, ms)
153 _rx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
155 * prefix 8 bits 0xc4 Three byte VEX
157 * 0x8f Three byte XOP
158 * ~R 1 bit Inverted REX.R
159 * ~X 1 bit Inverted REX.X
160 * ~B 1 bit Inverted REX.B
161 * map 5 bits Opcode map to use
162 * W 1 bit REX.W for integer, otherwise opcode extension
163 * ~vvvv 4 bits Inverted XMM or YMM registers
164 * L 1 bit 128 bit vector if 0, 256 otherwise
165 * pp 2 bits Mandatory prefix
172 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
173 * | 1 1 0 0 0 1 0 0 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
174 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
176 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
177 * | 1 0 0 0 1 1 1 1 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
178 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
180 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
181 * | 1 1 0 0 0 1 0 1 | |~R | ~vvvv | L | pp |
182 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
184 # define vex(r,x,b,map,w,vvvv,l,pp) _vex(_jit,r,x,b,map,w,vvvv,l,pp)
186 _vex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
187 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
188 # define nop(n) _nop(_jit, n)
189 static void _nop(jit_state_t*, jit_int32_t);
190 # define emms() is(0x770f)
191 # define lea(md, rb, ri, ms, rd) _lea(_jit, md, rb, ri, ms, rd)
193 _lea(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
194 # define pushr(r0) _pushr(_jit, r0)
195 static void _pushr(jit_state_t*, jit_int32_t) maybe_unused;
196 # define popr(r0) _popr(_jit, r0)
197 static void _popr(jit_state_t*, jit_int32_t) maybe_unused;
198 # define xchgr(r0, r1) _xchgr(_jit, r0, r1)
199 static void _xchgr(jit_state_t*, jit_int32_t, jit_int32_t);
200 # define testr(r0, r1) _testr(_jit, r0, r1)
201 static void _testr(jit_state_t*, jit_int32_t, jit_int32_t);
202 # define testi(r0, i0) _testi(_jit, r0, i0)
203 static void _testi(jit_state_t*, jit_int32_t, jit_word_t);
204 # define cc(code, r0) _cc(_jit, code, r0)
205 static void _cc(jit_state_t*, jit_int32_t, jit_int32_t);
206 # define icmpr(r0, r1) alur(X86_CMP, r0, r1)
207 # define alur(code, r0, r1) _alur(_jit, code, r0, r1)
208 static void _alur(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
209 # define icmpi(r0, i0) alui(X86_CMP, r0, i0)
210 # define alui(code, r0, i0) _alui(_jit, code, r0, i0)
211 static void _alui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
212 # define iaddr(r0, r1) alur(X86_ADD, r0, r1)
213 # define save(r0) _save(_jit, r0)
214 static void _save(jit_state_t*, jit_int32_t);
215 # define load(r0) _load(_jit, r0)
216 static void _load(jit_state_t*, jit_int32_t);
217 # define addr(r0, r1, r2) _addr(_jit, r0, r1, r2)
218 static void _addr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
219 # define iaddi(r0, i0) alui(X86_ADD, r0, i0)
220 # define addi(r0, r1, i0) _addi(_jit, r0, r1, i0)
221 static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
222 #define addcr(r0, r1, r2) _addcr(_jit, r0, r1, r2)
223 static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
224 #define addci(r0, r1, i0) _addci(_jit, r0, r1, i0)
225 static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
226 # define iaddxr(r0, r1) _iaddxr(_jit, r0, r1)
227 static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t);
228 # define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2)
229 static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
230 # define iaddxi(r0, i0) alui(X86_ADC, r0, i0)
231 # define addxi(r0, r1, i0) _addxi(_jit, r0, r1, i0)
232 static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
233 # define isubr(r0, r1) alur(X86_SUB, r0, r1)
234 # define subr(r0, r1, r2) _subr(_jit, r0, r1, r2)
235 static void _subr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
236 # define isubi(r0, i0) alui(X86_SUB, r0, i0)
237 # define subi(r0, r1, i0) _subi(_jit, r0, r1, i0)
238 static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
239 # define subcr(r0, r1, r2) _subcr(_jit, r0, r1, r2)
240 static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
241 # define subci(r0, r1, i0) _subci(_jit, r0, r1, i0)
242 static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
243 # define isubxr(r0, r1) alur(X86_SBB, r0, r1)
244 # define subxr(r0, r1, r2) _subxr(_jit, r0, r1, r2)
245 static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
246 # define isubxi(r0, i0) alui(X86_SBB, r0, i0)
247 # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0)
248 static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
249 # define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0)
250 static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
251 # define imulr(r0, r1) _imulr(_jit, r0, r1)
252 static void _imulr(jit_state_t*, jit_int32_t, jit_int32_t);
253 # define imuli(r0, r1, i0) _imuli(_jit, r0, r1, i0)
254 static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
255 # define mulr(r0, r1, r2) _mulr(_jit, r0, r1, r2)
256 static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
257 # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
258 static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
259 # define hmulr(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 1)
260 # define hmulr_u(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 0)
261 # define hmuli(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 1)
262 # define hmuli_u(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 0)
263 # define umulr(r0) unr(X86_IMUL, r0)
264 # define umulr_u(r0) unr(X86_MUL, r0)
265 # define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1)
266 # define qmulr_u(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 0)
267 # define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
268 static void _iqmulr(jit_state_t*, jit_int32_t, jit_int32_t,
269 jit_int32_t,jit_int32_t, jit_bool_t);
270 # define qmuli(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 1)
271 # define qmuli_u(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 0)
272 # define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign)
273 static void _iqmuli(jit_state_t*, jit_int32_t, jit_int32_t,
274 jit_int32_t,jit_word_t, jit_bool_t);
275 # define sign_extend_rdx_rax() _sign_extend_rdx_rax(_jit)
276 static void _sign_extend_rdx_rax(jit_state_t*);
277 # define idivr(r0) unr(X86_IDIV, r0)
278 # define idivr_u(r0) unr(X86_DIV, r0)
279 # define divremr(r0, r1, r2, i0, i1) _divremr(_jit, r0, r1, r2, i0, i1)
281 _divremr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
282 jit_bool_t,jit_bool_t);
283 # define divremi(r0, r1, i0, i1, i2) _divremi(_jit, r0, r1, i0, i1, i2)
285 _divremi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_bool_t,jit_bool_t);
286 # define divr(r0, r1, r2) divremr(r0, r1, r2, 1, 1)
287 # define divi(r0, r1, i0) divremi(r0, r1, i0, 1, 1)
288 # define divr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 1)
289 # define divi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 1)
290 # define qdivr(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 1)
291 # define qdivr_u(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 0)
292 # define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign)
293 static void _iqdivr(jit_state_t*, jit_int32_t, jit_int32_t,
294 jit_int32_t,jit_int32_t, jit_bool_t);
295 # define qdivi(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 1)
296 # define qdivi_u(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 0)
297 # define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign)
298 static void _iqdivi(jit_state_t*, jit_int32_t, jit_int32_t,
299 jit_int32_t,jit_word_t, jit_bool_t);
300 # define remr(r0, r1, r2) divremr(r0, r1, r2, 1, 0)
301 # define remi(r0, r1, i0) divremi(r0, r1, i0, 1, 0)
302 # define remr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 0)
303 # define remi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 0)
304 # define iandr(r0, r1) alur(X86_AND, r0, r1)
305 # define andr(r0, r1, r2) _andr(_jit, r0, r1, r2)
306 static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
307 # define iandi(r0, i0) alui(X86_AND, r0, i0)
308 # define andi(r0, r1, i0) _andi(_jit, r0, r1, i0)
309 static void _andi(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
310 # define iorr(r0, r1) alur(X86_OR, r0, r1)
311 # define orr(r0, r1, r2) _orr(_jit, r0, r1, r2)
312 static void _orr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
313 # define iori(r0, i0) alui(X86_OR, r0, i0)
314 # define ori(r0, r1, i0) _ori(_jit, r0, r1, i0)
315 static void _ori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
316 # define ixorr(r0, r1) alur(X86_XOR, r0, r1)
317 # define xorr(r0, r1, r2) _xorr(_jit, r0, r1, r2)
318 static void _xorr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
319 # define ixori(r0, i0) alui(X86_XOR, r0, i0)
320 # define xori(r0, r1, i0) _xori(_jit, r0, r1, i0)
321 static void _xori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
322 # define irotshr(code, r0) _irotshr(_jit, code, r0)
323 static void _irotshr(jit_state_t*, jit_int32_t, jit_int32_t);
324 # define rotshr(code, r0, r1, r2) _rotshr(_jit, code, r0, r1, r2)
326 _rotshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
327 # define irotshi(code, r0, i0) _irotshi(_jit, code, r0, i0)
328 static void _irotshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
329 # define rotshi(code, r0, r1, i0) _rotshi(_jit, code, r0, r1, i0)
331 _rotshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
332 # define lshr(r0, r1, r2) rotshr(X86_SHL, r0, r1, r2)
333 # define qlshr(r0, r1, r2, r3) xlshr(1, r0, r1, r2, r3)
334 # define xlshr(s, r0, r1, r2, r3) _xlshr(_jit, s, r0, r1, r2, r3)
336 _xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
337 # define lshi(r0, r1, i0) _lshi(_jit, r0, r1, i0)
338 static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
339 # define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0)
340 # define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0)
342 _xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
343 # define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3)
344 # define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0)
345 # define rshr(r0, r1, r2) rotshr(X86_SAR, r0, r1, r2)
346 # define rshi(r0, r1, i0) rotshi(X86_SAR, r0, r1, i0)
347 # define rshr_u(r0, r1, r2) rotshr(X86_SHR, r0, r1, r2)
348 # define rshi_u(r0, r1, i0) rotshi(X86_SHR, r0, r1, i0)
349 # define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3)
350 # define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3)
351 # define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3)
353 _xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
354 # define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0)
355 # define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0)
356 # define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0)
358 _xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
359 # define lrotr(r0, r1, r2) rotshr(X86_ROL, r0, r1, r2)
360 # define lroti(r0, r1, i0) rotshi(X86_ROL, r0, r1, i0)
361 # define rrotr(r0, r1, r2) rotshr(X86_ROR, r0, r1, r2)
362 # define rroti(r0, r1, i0) rotshi(X86_ROR, r0, r1, i0)
363 # define unr(code, r0) _unr(_jit, code, r0)
364 static void _unr(jit_state_t*, jit_int32_t, jit_int32_t);
365 # define inegr(r0) unr(X86_NEG, r0)
366 # define negr(r0, r1) _negr(_jit, r0, r1)
367 static void _negr(jit_state_t*, jit_int32_t, jit_int32_t);
368 # define icomr(r0) unr(X86_NOT, r0)
369 # define comr(r0, r1) _comr(_jit, r0, r1)
370 static void _comr(jit_state_t*, jit_int32_t, jit_int32_t);
372 # define incr(r0, r1) _incr(_jit, r0, r1)
373 static void _incr(jit_state_t*, jit_int32_t, jit_int32_t);
374 # define decr(r0, r1) _decr(_jit, r0, r1)
375 static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
377 # define clor(r0, r1) _clor(_jit, r0, r1)
378 static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
379 # define clzr(r0, r1) _clzr(_jit, r0, r1)
380 static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
381 # define ctor(r0, r1) _ctor(_jit, r0, r1)
382 static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
383 # define ctzr(r0, r1) _ctzr(_jit, r0, r1)
384 static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
385 # define rbitr(r0, r1) _rbitr(_jit, r0, r1)
386 static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t);
387 # define popcntr(r0, r1) _popcntr(_jit, r0, r1)
388 static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t);
389 # define cr(code, r0, r1, r2) _cr(_jit, code, r0, r1, r2)
391 _cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
392 # define ci(code, r0, r1, i0) _ci(_jit, code, r0, r1, i0)
394 _ci(jit_state_t *_jit, jit_int32_t, jit_int32_t, jit_int32_t, jit_word_t);
395 # define ci0(code, r0, r1) _ci0(_jit, code, r0, r1)
396 static void _ci0(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
397 # define ltr(r0, r1, r2) _ltr(_jit, r0, r1, r2)
398 static void _ltr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
399 # define lti(r0, r1, i0) _lti(_jit, r0, r1, i0)
400 static void _lti(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
401 # define ltr_u(r0, r1, r2) _ltr_u(_jit, r0, r1, r2)
402 static void _ltr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
403 # define lti_u(r0, r1, i0) ci(X86_CC_B, r0, r1, i0)
404 # define ler(r0, r1, r2) _ler(_jit, r0, r1, r2)
405 static void _ler(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
406 # define lei(r0, r1, i0) ci(X86_CC_LE, r0, r1, i0)
407 # define ler_u(r0, r1, r2) _ler_u(_jit, r0, r1, r2)
408 static void _ler_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
409 # define lei_u(r0, r1, i0) _lei_u(_jit, r0, r1, i0)
410 static void _lei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
411 # define eqr(r0, r1, r2) _eqr(_jit, r0, r1, r2)
412 static void _eqr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
413 # define eqi(r0, r1, i0) _eqi(_jit, r0, r1, i0)
414 static void _eqi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
415 # define ger(r0, r1, r2) _ger(_jit, r0, r1, r2)
416 static void _ger(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
417 # define gei(r0, r1, i0) _gei(_jit, r0, r1, i0)
418 static void _gei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
419 # define ger_u(r0, r1, r2) _ger_u(_jit, r0, r1, r2)
420 static void _ger_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
421 # define gei_u(r0, r1, i0) _gei_u(_jit, r0, r1, i0)
422 static void _gei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
423 # define gtr(r0, r1, r2) _gtr(_jit, r0, r1, r2)
424 static void _gtr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
425 # define gti(r0, r1, i0) _ci(_jit, X86_CC_G, r0, r1, i0)
426 # define gtr_u(r0, r1, r2) _gtr_u(_jit, r0, r1, r2)
427 static void _gtr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
428 # define gti_u(r0, r1, i0) _gti_u(_jit, r0, r1, i0)
429 static void _gti_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
430 # define ner(r0, r1, r2) _ner(_jit, r0, r1, r2)
431 static void _ner(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
432 # define nei(r0, r1, i0) _nei(_jit, r0, r1, i0)
433 static void _nei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
434 # define movr(r0, r1) _movr(_jit, r0, r1)
435 static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
436 # define imovi(r0, i0) _imovi(_jit, r0, i0)
437 static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
438 # define movi(r0, i0) _movi(_jit, r0, i0)
445 _movi(jit_state_t*, jit_int32_t, jit_word_t);
446 # define movi_p(r0, i0) _movi_p(_jit, r0, i0)
447 static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
448 # define movcr(r0, r1) _movcr(_jit, r0, r1)
449 static void _movcr(jit_state_t*,jit_int32_t,jit_int32_t);
450 # define movcr_u(r0, r1) _movcr_u(_jit, r0, r1)
451 static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
452 # define movsr(r0, r1) _movsr(_jit, r0, r1)
453 static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
454 # define movsr_u(r0, r1) _movsr_u(_jit, r0, r1)
455 static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
456 # define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
457 static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
458 jit_int32_t,jit_int32_t,jit_word_t);
459 #define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
460 #define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
461 #define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2)
462 static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
463 #define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2)
464 static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
465 # if __X64 && !__X64_32
466 # define movir(r0, r1) _movir(_jit, r0, r1)
467 static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
468 # define movir_u(r0, r1) _movir_u(_jit, r0, r1)
469 static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
471 # define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
472 static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
473 # define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
474 static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
475 # if __X64 && !__X64_32
476 #define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1)
477 static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
479 # define extr(r0, r1, i0, i1) _extr(_jit, r0, r1, i0, i1)
480 static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
481 # define extr_u(r0, r1, i0, i1) _extr_u(_jit, r0, r1, i0, i1)
482 static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
483 # define depr(r0, r1, i0, i1) _depr(_jit, r0, r1, i0, i1)
484 static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
485 # define extr_c(r0, r1) _extr_c(_jit, r0, r1)
486 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
487 # define extr_uc(r0, r1) _extr_uc(_jit, r0, r1)
488 static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
489 # define extr_s(r0, r1) movsr(r0, r1)
490 # define extr_us(r0, r1) movsr_u(r0, r1)
491 # if __X64 && !__X64_32
492 # define extr_i(r0, r1) movir(r0, r1)
493 # define extr_ui(r0, r1) movir_u(r0, r1)
495 # define ldr_c(r0, r1) _ldr_c(_jit, r0, r1)
496 static void _ldr_c(jit_state_t*, jit_int32_t, jit_int32_t);
497 # define ldi_c(r0, i0) _ldi_c(_jit, r0, i0)
498 static void _ldi_c(jit_state_t*, jit_int32_t, jit_word_t);
499 # define ldr_uc(r0, r1) _ldr_uc(_jit, r0, r1)
500 static void _ldr_uc(jit_state_t*, jit_int32_t, jit_int32_t);
501 # define ldi_uc(r0, i0) _ldi_uc(_jit, r0, i0)
502 static void _ldi_uc(jit_state_t*, jit_int32_t, jit_word_t);
503 # define ldr_s(r0, r1) _ldr_s(_jit, r0, r1)
504 static void _ldr_s(jit_state_t*, jit_int32_t, jit_int32_t);
505 # define ldi_s(r0, i0) _ldi_s(_jit, r0, i0)
506 static void _ldi_s(jit_state_t*, jit_int32_t, jit_word_t);
507 # define ldr_us(r0, r1) _ldr_us(_jit, r0, r1)
508 static void _ldr_us(jit_state_t*, jit_int32_t, jit_int32_t);
509 # define ldi_us(r0, i0) _ldi_us(_jit, r0, i0)
510 static void _ldi_us(jit_state_t*, jit_int32_t, jit_word_t);
511 # if __X32 || !__X64_32
512 # define ldr_i(r0, r1) _ldr_i(_jit, r0, r1)
513 static void _ldr_i(jit_state_t*, jit_int32_t, jit_int32_t);
514 # define ldi_i(r0, i0) _ldi_i(_jit, r0, i0)
515 static void _ldi_i(jit_state_t*, jit_int32_t, jit_word_t);
519 # define ldr_i(r0, r1) _ldr_ui(_jit, r0, r1)
520 # define ldi_i(r0, i0) _ldi_ui(_jit, r0, i0)
522 # define ldr_ui(r0, r1) _ldr_ui(_jit, r0, r1)
523 # define ldi_ui(r0, i0) _ldi_ui(_jit, r0, i0)
525 static void _ldr_ui(jit_state_t*, jit_int32_t, jit_int32_t);
526 static void _ldi_ui(jit_state_t*, jit_int32_t, jit_word_t);
528 # define ldr_l(r0, r1) _ldr_l(_jit, r0, r1)
529 static void _ldr_l(jit_state_t*, jit_int32_t, jit_int32_t);
530 # define ldi_l(r0, i0) _ldi_l(_jit, r0, i0)
531 static void _ldi_l(jit_state_t*, jit_int32_t, jit_word_t);
534 # define ldxr_c(r0, r1, r2) _ldxr_c(_jit, r0, r1, r2)
535 static void _ldxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
536 # define ldxi_c(r0, r1, i0) _ldxi_c(_jit, r0, r1, i0)
537 static void _ldxi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
538 # define ldxr_uc(r0, r1, r2) _ldxr_uc(_jit, r0, r1, r2)
539 static void _ldxr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
540 # define ldxi_uc(r0, r1, i0) _ldxi_uc(_jit, r0, r1, i0)
541 static void _ldxi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
542 # define ldxr_s(r0, r1, r2) _ldxr_s(_jit, r0, r1, r2)
543 static void _ldxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
544 # define ldxi_s(r0, r1, i0) _ldxi_s(_jit, r0, r1, i0)
545 static void _ldxi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
546 # define ldxr_us(r0, r1, r2) _ldxr_us(_jit, r0, r1, r2)
547 static void _ldxr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
548 # define ldxi_us(r0, r1, i0) _ldxi_us(_jit, r0, r1, i0)
549 static void _ldxi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
550 # if __X32 || !__X64_32
551 # define ldxr_i(r0, r1, r2) _ldxr_i(_jit, r0, r1, r2)
552 static void _ldxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
553 # define ldxi_i(r0, r1, i0) _ldxi_i(_jit, r0, r1, i0)
554 static void _ldxi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
558 # define ldxr_i(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2)
559 # define ldxi_i(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0)
561 # define ldxr_ui(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2)
562 # define ldxi_ui(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0)
564 static void _ldxr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
565 static void _ldxi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
567 # define ldxr_l(r0, r1, r2) _ldxr_l(_jit, r0, r1, r2)
568 static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
569 # define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0)
570 static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
573 # define unldr(r0, r1, i0) generic_unldr(r0, r1, i0)
574 # define unldi(r0, i0, i1) generic_unldi(r0, i0, i1)
575 # define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0)
576 # define unldi_u(r0, i0, i1) generic_unldi_u(r0, i0, i1)
577 # define str_c(r0, r1) _str_c(_jit, r0, r1)
578 static void _str_c(jit_state_t*, jit_int32_t, jit_int32_t);
579 # define sti_c(i0, r0) _sti_c(_jit, i0, r0)
580 static void _sti_c(jit_state_t*, jit_word_t, jit_int32_t);
581 # define str_s(r0, r1) _str_s(_jit, r0, r1)
582 static void _str_s(jit_state_t*, jit_int32_t, jit_int32_t);
583 # define sti_s(i0, r0) _sti_s(_jit, i0, r0)
584 static void _sti_s(jit_state_t*, jit_word_t, jit_int32_t);
585 # define str_i(r0, r1) _str_i(_jit, r0, r1)
586 static void _str_i(jit_state_t*, jit_int32_t, jit_int32_t);
587 # define sti_i(i0, r0) _sti_i(_jit, i0, r0)
588 static void _sti_i(jit_state_t*, jit_word_t, jit_int32_t);
589 # if __X64 && !__X64_32
590 # define str_l(r0, r1) _str_l(_jit, r0, r1)
591 static void _str_l(jit_state_t*, jit_int32_t, jit_int32_t);
592 # define sti_l(i0, r0) _sti_l(_jit, i0, r0)
593 static void _sti_l(jit_state_t*, jit_word_t, jit_int32_t);
595 # define stxr_c(r0, r1, r2) _stxr_c(_jit, r0, r1, r2)
596 static void _stxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
597 # define stxi_c(i0, r0, r1) _stxi_c(_jit, i0, r0, r1)
598 static void _stxi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
599 # define stxr_s(r0, r1, r2) _stxr_s(_jit, r0, r1, r2)
600 static void _stxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
601 # define stxi_s(i0, r0, r1) _stxi_s(_jit, i0, r0, r1)
602 static void _stxi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
603 # define stxr_i(r0, r1, r2) _stxr_i(_jit, r0, r1, r2)
604 static void _stxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
605 # define stxi_i(i0, r0, r1) _stxi_i(_jit, i0, r0, r1)
606 static void _stxi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
607 # if __X64 && !__X64_32
608 # define stxr_l(r0, r1, r2) _stxr_l(_jit, r0, r1, r2)
609 static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
610 # define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1)
611 static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
613 #define unstr(r0, r1, i0) generic_unstr(r0, r1, i0)
614 #define unsti(i0, r0, i1) generic_unsti(i0, r0, i1)
615 # define jcc(code, i0) _jcc(_jit, code, i0)
616 # define jo(i0) jcc(X86_CC_O, i0)
617 # define jno(i0) jcc(X86_CC_NO, i0)
618 # define jnae(i0) jcc(X86_CC_NAE, i0)
619 # define jb(i0) jcc(X86_CC_B, i0)
620 # define jc(i0) jcc(X86_CC_C, i0)
621 # define jae(i0) jcc(X86_CC_AE, i0)
622 # define jnb(i0) jcc(X86_CC_NB, i0)
623 # define jnc(i0) jcc(X86_CC_NC, i0)
624 # define je(i0) jcc(X86_CC_E, i0)
625 # define jz(i0) jcc(X86_CC_Z, i0)
626 # define jne(i0) jcc(X86_CC_NE, i0)
627 # define jnz(i0) jcc(X86_CC_NZ, i0)
628 # define jbe(i0) jcc(X86_CC_BE, i0)
629 # define jna(i0) jcc(X86_CC_NA, i0)
630 # define ja(i0) jcc(X86_CC_A, i0)
631 # define jnbe(i0) jcc(X86_CC_NBE, i0)
632 # define js(i0) jcc(X86_CC_S, i0)
633 # define jns(i0) jcc(X86_CC_NS, i0)
634 # define jp(i0) jcc(X86_CC_P, i0)
635 # define jpe(i0) jcc(X86_CC_PE, i0)
636 # define jnp(i0) jcc(X86_CC_NP, i0)
637 # define jpo(i0) jcc(X86_CC_PO, i0)
638 # define jl(i0) jcc(X86_CC_L, i0)
639 # define jnge(i0) jcc(X86_CC_NGE, i0)
640 # define jge(i0) jcc(X86_CC_GE, i0)
641 # define jnl(i0) jcc(X86_CC_NL, i0)
642 # define jle(i0) jcc(X86_CC_LE, i0)
643 # define jng(i0) jcc(X86_CC_NG, i0)
644 # define jg(i0) jcc(X86_CC_G, i0)
645 # define jnle(i0) jcc(X86_CC_NLE, i0)
646 static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t);
647 # define jccs(code, i0) _jccs(_jit, code, i0)
648 # define jos(i0) jccs(X86_CC_O, i0)
649 # define jnos(i0) jccs(X86_CC_NO, i0)
650 # define jnaes(i0) jccs(X86_CC_NAE, i0)
651 # define jbs(i0) jccs(X86_CC_B, i0)
652 # define jcs(i0) jccs(X86_CC_C, i0)
653 # define jaes(i0) jccs(X86_CC_AE, i0)
654 # define jnbs(i0) jccs(X86_CC_NB, i0)
655 # define jncs(i0) jccs(X86_CC_NC, i0)
656 # define jes(i0) jccs(X86_CC_E, i0)
657 # define jzs(i0) jccs(X86_CC_Z, i0)
658 # define jnes(i0) jccs(X86_CC_NE, i0)
659 # define jnzs(i0) jccs(X86_CC_NZ, i0)
660 # define jbes(i0) jccs(X86_CC_BE, i0)
661 # define jnas(i0) jccs(X86_CC_NA, i0)
662 # define jas(i0) jccs(X86_CC_A, i0)
663 # define jnbes(i0) jccs(X86_CC_NBE, i0)
664 # define jss(i0) jccs(X86_CC_S, i0)
665 # define jnss(i0) jccs(X86_CC_NS, i0)
666 # define jps(i0) jccs(X86_CC_P, i0)
667 # define jpes(i0) jccs(X86_CC_PE, i0)
668 # define jnps(i0) jccs(X86_CC_NP, i0)
669 # define jpos(i0) jccs(X86_CC_PO, i0)
670 # define jls(i0) jccs(X86_CC_L, i0)
671 # define jnges(i0) jccs(X86_CC_NGE, i0)
672 # define jges(i0) jccs(X86_CC_GE, i0)
673 # define jnls(i0) jccs(X86_CC_NL, i0)
674 # define jles(i0) jccs(X86_CC_LE, i0)
675 # define jngs(i0) jccs(X86_CC_NG, i0)
676 # define jgs(i0) jccs(X86_CC_G, i0)
677 # define jnles(i0) jccs(X86_CC_NLE, i0)
678 static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t);
679 # define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1)
680 static jit_word_t _jcr(jit_state_t*,
681 jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
682 # define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1)
683 static jit_word_t _jci(jit_state_t*,
684 jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
685 # define jci0(code, i0, r0) _jci0(_jit, code, i0, r0)
686 static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
687 # define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1)
688 static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
689 # define blti(i0, r0, i1) _blti(_jit, i0, r0, i1)
690 static jit_word_t _blti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
691 # define bltr_u(i0, r0, r1) _bltr_u(_jit, i0, r0, r1)
692 static jit_word_t _bltr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
693 # define blti_u(i0, r0, i1) _blti_u(_jit, i0, r0, i1)
694 static jit_word_t _blti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
695 # define bler(i0, r0, r1) _bler(_jit, i0, r0, r1)
696 static jit_word_t _bler(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
697 # define blei(i0, r0, i1) _blei(_jit, i0, r0, i1)
698 static jit_word_t _blei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
699 # define bler_u(i0, r0, r1) _bler_u(_jit, i0, r0, r1)
700 static jit_word_t _bler_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
701 # define blei_u(i0, r0, i1) _blei_u(_jit, i0, r0, i1)
702 static jit_word_t _blei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
703 # define beqr(i0, r0, r1) _beqr(_jit, i0, r0, r1)
704 static jit_word_t _beqr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
705 # define beqi(i0, r0, i1) _beqi(_jit, i0, r0, i1)
706 static jit_word_t _beqi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
707 # define bger(i0, r0, r1) _bger(_jit, i0, r0, r1)
708 static jit_word_t _bger(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
709 # define bgei(i0, r0, i1) _bgei(_jit, i0, r0, i1)
710 static jit_word_t _bgei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
711 # define bger_u(i0, r0, r1) _bger_u(_jit, i0, r0, r1)
712 static jit_word_t _bger_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
713 # define bgei_u(i0, r0, i1) _bgei_u(_jit, i0, r0, i1)
714 static jit_word_t _bgei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
715 # define bgtr(i0, r0, r1) _bgtr(_jit, i0, r0, r1)
716 static jit_word_t _bgtr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
717 # define bgti(i0, r0, i1) _bgti(_jit, i0, r0, i1)
718 static jit_word_t _bgti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
719 # define bgtr_u(i0, r0, r1) _bgtr_u(_jit, i0, r0, r1)
720 static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
721 # define bgti_u(i0, r0, i1) _bgti_u(_jit, i0, r0, i1)
722 static jit_word_t _bgti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
723 # define bner(i0, r0, r1) _bner(_jit, i0, r0, r1)
724 static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
725 # define bnei(i0, r0, i1) _bnei(_jit, i0, r0, i1)
726 static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
727 # define bmsr(i0, r0, r1) _bmsr(_jit, i0, r0, r1)
728 static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
729 # define bmsi(i0, r0, i1) _bmsi(_jit, i0, r0, i1)
730 static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
731 # define bmcr(i0, r0, r1) _bmcr(_jit, i0, r0, r1)
732 static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
733 # define bmci(i0, r0, i1) _bmci(_jit, i0, r0, i1)
734 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
735 # define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1)
736 static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
737 # define boaddi(i0, r0, i1) _boaddi(_jit, i0, r0, i1)
738 static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
739 # define boaddr_u(i0, r0, r1) _boaddr_u(_jit, i0, r0, r1)
740 static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
741 # define boaddi_u(i0, r0, i1) _boaddi_u(_jit, i0, r0, i1)
742 static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
743 # define bxaddr(i0, r0, r1) _bxaddr(_jit, i0, r0, r1)
744 static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
745 # define bxaddi(i0, r0, i1) _bxaddi(_jit, i0, r0, i1)
746 static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
747 # define bxaddr_u(i0, r0, r1) _bxaddr_u(_jit, i0, r0, r1)
748 static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
749 # define bxaddi_u(i0, r0, i1) _bxaddi_u(_jit, i0, r0, i1)
750 static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
751 # define bosubr(i0, r0, r1) _bosubr(_jit, i0, r0, r1)
752 static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
753 # define bosubi(i0, r0, i1) _bosubi(_jit, i0, r0, i1)
754 static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
755 # define bosubr_u(i0, r0, r1) _bosubr_u(_jit, i0, r0, r1)
756 static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
757 # define bosubi_u(i0, r0, i1) _bosubi_u(_jit, i0, r0, i1)
758 static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
759 # define bxsubr(i0, r0, r1) _bxsubr(_jit, i0, r0, r1)
760 static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
761 # define bxsubi(i0, r0, i1) _bxsubi(_jit, i0, r0, i1)
762 static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
763 # define bxsubr_u(i0, r0, r1) _bxsubr_u(_jit, i0, r0, r1)
764 static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
765 # define bxsubi_u(i0, r0, i1) _bxsubi_u(_jit, i0, r0, i1)
766 static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
767 # define callr(r0) _callr(_jit, r0)
768 static void _callr(jit_state_t*, jit_int32_t);
769 # define calli(i0) _calli(_jit, i0)
770 static jit_word_t _calli(jit_state_t*, jit_word_t);
772 # define calli_p(i0) _calli_p(_jit, i0)
773 static jit_word_t _calli_p(jit_state_t*, jit_word_t);
775 # define calli_p(i0) calli(i0)
777 # define jmpr(r0) _jmpr(_jit, r0)
778 static void _jmpr(jit_state_t*, jit_int32_t);
779 # define jmpi(i0) _jmpi(_jit, i0)
780 static jit_word_t _jmpi(jit_state_t*, jit_word_t);
782 # define jmpi_p(i0) _jmpi_p(_jit, i0)
783 static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
785 # define jmpi_p(i0) jmpi(i0)
787 # define jmpsi(i0) _jmpsi(_jit, i0)
788 static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t);
789 # define prolog(node) _prolog(_jit, node)
790 static void _prolog(jit_state_t*, jit_node_t*);
791 # define epilog(node) _epilog(_jit, node)
792 static void _epilog(jit_state_t*, jit_node_t*);
793 # define vastart(r0) _vastart(_jit, r0)
794 static void _vastart(jit_state_t*, jit_int32_t);
795 # define vaarg(r0, r1) _vaarg(_jit, r0, r1)
796 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
797 # define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0)
798 static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
799 # define patch_at(instr, label) _patch_at(_jit, instr, label)
800 static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
801 # if !defined(HAVE_FFSL)
803 # define ffsl(i) __builtin_ffs(i)
805 # define ffsl(l) __builtin_ffsl(l)
808 # define jit_cmov_p() jit_cpu.cmov
813 _rex(jit_state_t *_jit, jit_int32_t l, jit_int32_t w,
814 jit_int32_t r, jit_int32_t x, jit_int32_t b)
817 jit_int32_t v = 0x40 | (w << 3);
831 _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
832 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
836 /* Use ms == _SCL8 to tell it is a %rip relative displacement */
840 mrm(0x00, r7(rd), 0x05);
843 mrm(0x00, r7(rd), 0x04);
844 sib(_SCL1, 0x04, 0x05);
849 else if (r7(rb) == _RSP_REGNO) {
851 mrm(0x00, r7(rd), 0x04);
854 else if ((jit_int8_t)md == md) {
855 mrm(0x01, r7(rd), 0x04);
860 mrm(0x02, r7(rd), 0x04);
866 if (md == 0 && r7(rb) != _RBP_REGNO)
867 mrm(0x00, r7(rd), r7(rb));
868 else if ((jit_int8_t)md == md) {
869 mrm(0x01, r7(rd), r7(rb));
873 mrm(0x02, r7(rd), r7(rb));
878 else if (rb == _NOREG) {
879 mrm(0x00, r7(rd), 0x04);
880 sib(ms, r7(ri), 0x05);
883 else if (r8(ri) != _RSP_REGNO) {
884 if (md == 0 && r7(rb) != _RBP_REGNO) {
885 mrm(0x00, r7(rd), 0x04);
886 sib(ms, r7(ri), r7(rb));
888 else if ((jit_int8_t)md == md) {
889 mrm(0x01, r7(rd), 0x04);
890 sib(ms, r7(ri), r7(rb));
894 mrm(0x02, r7(rd), 0x04);
895 sib(ms, r7(ri), r7(rb));
900 fprintf(stderr, "illegal index register");
906 _vex(jit_state_t *_jit, jit_int32_t r, jit_int32_t x, jit_int32_t b,
907 jit_int32_t map, jit_int32_t w, jit_int32_t vvvv, jit_int32_t l,
911 if (r == _NOREG) r = 0;
912 if (x == _NOREG) x = 0;
913 if (b == _NOREG) b = 0;
914 if (map == 1 && w == 0 && ((x|b) & 8) == 0) {
915 /* Two byte prefix */
918 v = (r & 8) ? 0 : 0x80;
921 /* Three byte prefix */
929 if (!(r & 8)) v |= 0x80;
931 if (!(x & 8)) v |= 0x40;
933 if (!(b & 8)) v |= 0x20;
939 v |= (~vvvv & 0x0f) << 3;
948 _nop(jit_state_t *_jit, jit_int32_t count)
964 case 3: /* NOP DWORD ptr [EAX] */
965 ic(0x0f); ic(0x1f); ic(0x00);
967 case 4: /* NOP DWORD ptr [EAX + 00H] */
968 ic(0x0f); ic(0x1f); ic(0x40); ic(0x00);
970 case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
971 ic(0x0f); ic(0x1f); ic(0x44); ic(0x00);
974 case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
975 ic(0x66); ic(0x0f); ic(0x1f); ic(0x44);
978 case 7: /* NOP DWORD ptr [EAX + 00000000H] */
979 ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000);
981 case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
982 ic(0x0f); ic(0x1f); ic(0x84); ic(0x00);
985 case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
986 ic(0x66); ic(0x0f); ic(0x1f); ic(0x84);
987 ic(0x00); ii(0x0000);
994 _lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
995 jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
997 rex(0, WIDE, rd, ri, rb);
999 rx(rd, md, rb, ri, ms);
1003 _pushr(jit_state_t *_jit, jit_int32_t r0)
1005 rex(0, WIDE, 0, 0, r0);
1010 _popr(jit_state_t *_jit, jit_int32_t r0)
1012 rex(0, WIDE, 0, 0, r0);
1017 _xchgr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1019 rex(0, WIDE, r1, _NOREG, r0);
1021 mrm(0x03, r7(r1), r7(r0));
1025 _testr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1027 rex(0, WIDE, r1, _NOREG, r0);
1029 mrm(0x03, r7(r1), r7(r0));
1033 _testi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1035 rex(0, WIDE, _NOREG, _NOREG, r0);
1036 if (r0 == _RAX_REGNO)
1040 mrm(0x03, 0x00, r7(r0));
1046 _cc(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1048 rex(0, 0, _NOREG, _NOREG, r0);
1051 mrm(0x03, 0x00, r7(r0));
1055 _alur(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
1057 rex(0, WIDE, r1, _NOREG, r0);
1059 mrm(0x03, r7(r1), r7(r0));
1063 _alui(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
1066 if (can_sign_extend_int_p(i0)) {
1067 rex(0, WIDE, _NOREG, _NOREG, r0);
1068 if ((jit_int8_t)i0 == i0) {
1070 ic(0xc0 | code | r7(r0));
1074 if (r0 == _RAX_REGNO)
1078 ic(0xc0 | code | r7(r0));
1084 reg = jit_get_reg(jit_class_gpr);
1086 alur(code, r0, rn(reg));
1092 _save(jit_state_t *_jit, jit_int32_t r0)
1094 if (!_jitc->function->regoff[r0]) {
1095 _jitc->function->regoff[r0] = jit_allocai(sizeof(jit_word_t));
1098 assert(!jit_regset_tstbit(&_jitc->regsav, r0));
1099 jit_regset_setbit(&_jitc->regsav, r0);
1100 stxi(_jitc->function->regoff[r0], _RBP_REGNO, r0);
1104 _load(jit_state_t *_jit, jit_int32_t r0)
1106 assert(_jitc->function->regoff[r0]);
1107 assert(jit_regset_tstbit(&_jitc->regsav, r0));
1108 jit_regset_clrbit(&_jitc->regsav, r0);
1109 ldxi(r0, _RBP_REGNO, _jitc->function->regoff[r0]);
1113 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1120 lea(0, r1, r2, _SCL1, r0);
1124 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1135 else if (can_sign_extend_int_p(i0)) {
1139 lea(i0, r1, _NOREG, _SCL1, r0);
1141 else if (r0 != r1) {
1146 reg = jit_get_reg(jit_class_gpr);
1154 _addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1165 _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1168 if (can_sign_extend_int_p(i0)) {
1172 else if (r0 == r1) {
1173 reg = jit_get_reg(jit_class_gpr);
1185 _iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1187 /* FIXME: this is not doing what I did expect for the simple test case:
1188 * mov $0xffffffffffffffff, %rax -- rax = 0xffffffffffffffff (-1)
1189 * mov $0xffffffffffffffff, %r10 -- r10 = 0xffffffffffffffff (-1)
1190 * mov $0x1, %r11d -- r11 = 1
1191 * xor %rbx, %rbx -- rbx = 0
1194 * add %r11, %rax -- r11 = 0x10000000000000000 (0)
1195 * does not fit in 64 bit ^
1197 * $2 = [ CF PF AF ZF IF ]
1198 * adcx %r10, %rbx -- r10 = 0xffffffffffffffff (-1)
1200 * $3 = [ CF PF AF ZF IF ]
1202 * $4 = 0xffffffffffffffff
1203 * but, r10 should be zero, as it is:
1204 * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF)
1205 * FIXME: maybe should only use ADCX in the third operation onward, that
1206 * is, after the first ADC? In either case, the add -1+0+carry should
1207 * have used and consumed the carry? At least this is what is expected
1211 /* Significantly longer instruction, but avoid cpu stalls as only
1212 * the carry flag is used in a sequence. */
1216 rex(0, WIDE, r1, _NOREG, r0);
1220 mrm(0x03, r7(r1), r7(r0));
1224 alur(X86_ADC, r0, r1);
1228 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1239 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1244 /* Do not mix ADC and ADCX */
1247 can_sign_extend_int_p(i0)) {
1251 else if (r0 == r1) {
1252 reg = jit_get_reg(jit_class_gpr);
1254 iaddxr(r0, rn(reg));
1264 _subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1268 else if (r0 == r2) {
1279 _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1290 else if (can_sign_extend_int_p(i0)) {
1294 lea(-i0, r1, _NOREG, _SCL1, r0);
1296 else if (r0 != r1) {
1301 reg = jit_get_reg(jit_class_gpr);
1309 _subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1312 if (r0 == r2 && r0 != r1) {
1313 reg = jit_get_reg(jit_class_gpr);
1326 _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1330 if (can_sign_extend_int_p(i0))
1333 reg = jit_get_reg(jit_class_gpr);
1341 _subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1344 if (r0 == r2 && r0 != r1) {
1345 reg = jit_get_reg(jit_class_gpr);
1348 isubxr(r0, rn(reg));
1358 _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1362 if (can_sign_extend_int_p(i0))
1365 reg = jit_get_reg(jit_class_gpr);
1367 isubxr(r0, rn(reg));
1373 _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1380 _imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1382 rex(0, WIDE, r0, _NOREG, r1);
1385 mrm(0x03, r7(r0), r7(r1));
1389 _imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1392 if (can_sign_extend_int_p(i0)) {
1393 rex(0, WIDE, r0, _NOREG, r1);
1394 if ((jit_int8_t)i0 == i0) {
1396 mrm(0x03, r7(r0), r7(r1));
1401 mrm(0x03, r7(r0), r7(r1));
1406 reg = jit_get_reg(jit_class_gpr);
1414 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1427 _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1440 lea(0, _NOREG, r1, _SCL2, r0);
1443 lea(0, _NOREG, r1, _SCL4, r0);
1446 lea(0, _NOREG, r1, _SCL8, r0);
1449 if (i0 > 0 && !(i0 & (i0 - 1)))
1450 lshi(r0, r1, ffsl(i0) - 1);
1451 else if (can_sign_extend_int_p(i0))
1453 else if (r0 != r1) {
1463 #define savset(rn) \
1467 if (r1 != rn && r2 != rn) \
1471 #define isavset(rn) \
1479 #define qsavset(rn) \
1481 if (r0 != rn && r1 != rn) { \
1483 if (r2 != rn && r3 != rn) \
1487 #define allocr(rn, rv) \
1489 if (set & (1 << rn)) \
1490 (void)jit_get_reg(rv|jit_class_gpr|jit_class_named); \
1491 if (sav & (1 << rn)) { \
1492 if ( jit_regset_tstbit(&_jitc->regsav, rv) || \
1493 !jit_regset_tstbit(&_jitc->reglive, rv)) \
1494 sav &= ~(1 << rn); \
1499 #define clear(rn, rv) \
1501 if (set & (1 << rn)) \
1502 jit_unget_reg(rv); \
1503 if (sav & (1 << rn)) \
1508 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1509 jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1516 qsavset(_RDX_REGNO);
1517 qsavset(_RAX_REGNO);
1518 allocr(_RDX_REGNO, _RDX);
1519 allocr(_RAX_REGNO, _RAX);
1521 if (r3 == _RAX_REGNO)
1525 movr(_RAX_REGNO, r2);
1532 if (r0 != JIT_NOREG) {
1533 if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1534 xchgr(_RAX_REGNO, _RDX_REGNO);
1536 if (r0 != _RDX_REGNO)
1537 movr(r0, _RAX_REGNO);
1538 movr(r1, _RDX_REGNO);
1539 if (r0 == _RDX_REGNO)
1540 movr(r0, _RAX_REGNO);
1544 assert(r1 != JIT_NOREG);
1545 movr(r1, _RDX_REGNO);
1548 clear(_RDX_REGNO, _RDX);
1549 clear(_RAX_REGNO, _RAX);
1553 _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1554 jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1563 reg = jit_get_reg(jit_class_gpr);
1566 qmulr(r0, r1, r2, rn(reg));
1568 qmulr_u(r0, r1, r2, rn(reg));
1574 _sign_extend_rdx_rax(jit_state_t *_jit)
1576 rex(0, WIDE, 0, 0, 0);
1581 _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
1582 jit_bool_t sign, jit_bool_t divide)
1590 sav = set = use = 0;
1593 allocr(_RDX_REGNO, _RDX);
1594 allocr(_RAX_REGNO, _RAX);
1596 if (r2 == _RAX_REGNO) {
1597 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1598 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1599 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1600 jit_class_gpr|jit_class_named);
1603 movr(div, _RAX_REGNO);
1604 if (r1 != _RAX_REGNO)
1605 movr(_RAX_REGNO, r1);
1609 xchgr(r0, _RAX_REGNO);
1611 if (r0 != _RAX_REGNO)
1612 movr(r0, _RAX_REGNO);
1613 if (r1 != _RAX_REGNO)
1614 movr(_RAX_REGNO, r1);
1619 else if (r2 == _RDX_REGNO) {
1620 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1621 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1622 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1623 jit_class_gpr|jit_class_named);
1626 movr(div, _RDX_REGNO);
1627 if (r1 != _RAX_REGNO)
1628 movr(_RAX_REGNO, r1);
1631 if (r1 != _RAX_REGNO)
1632 movr(_RAX_REGNO, r1);
1633 movr(r0, _RDX_REGNO);
1638 if (r1 != _RAX_REGNO)
1639 movr(_RAX_REGNO, r1);
1644 sign_extend_rdx_rax();
1648 ixorr(_RDX_REGNO, _RDX_REGNO);
1656 movr(r0, _RAX_REGNO);
1658 movr(r0, _RDX_REGNO);
1660 clear(_RDX_REGNO, _RDX);
1661 clear(_RAX_REGNO, _RAX);
1665 _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
1666 jit_bool_t sign, jit_bool_t divide)
1686 if (i0 > 0 && !(i0 & (i0 - 1))) {
1689 rshi(r0, r0, ffsl(i0) - 1);
1691 rshi_u(r0, r0, ffsl(i0) - 1);
1697 else if (i0 == 1 || (sign && i0 == -1)) {
1701 else if (!sign && i0 > 0 && !(i0 & (i0 - 1))) {
1702 if (can_sign_extend_int_p(i0)) {
1706 else if (r0 != r1) {
1711 reg = jit_get_reg(jit_class_gpr);
1712 movi(rn(reg), i0 - 1);
1719 sav = set = use = 0;
1720 isavset(_RDX_REGNO);
1721 isavset(_RAX_REGNO);
1722 allocr(_RDX_REGNO, _RDX);
1723 allocr(_RAX_REGNO, _RAX);
1725 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) {
1726 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1727 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1728 jit_class_gpr|jit_class_named);
1736 movr(_RAX_REGNO, r1);
1739 sign_extend_rdx_rax();
1743 ixorr(_RDX_REGNO, _RDX_REGNO);
1751 movr(r0, _RAX_REGNO);
1753 movr(r0, _RDX_REGNO);
1755 clear(_RDX_REGNO, _RDX);
1756 clear(_RAX_REGNO, _RAX);
1760 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1761 jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1769 sav = set = use = 0;
1770 qsavset(_RDX_REGNO);
1771 qsavset(_RAX_REGNO);
1772 allocr(_RDX_REGNO, _RDX);
1773 allocr(_RAX_REGNO, _RAX);
1774 if (r3 == _RAX_REGNO) {
1775 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1776 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1777 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1778 jit_class_gpr|jit_class_named);
1781 movr(div, _RAX_REGNO);
1782 if (r2 != _RAX_REGNO)
1783 movr(_RAX_REGNO, r2);
1787 xchgr(r0, _RAX_REGNO);
1789 if (r0 != _RAX_REGNO)
1790 movr(r0, _RAX_REGNO);
1791 if (r2 != _RAX_REGNO)
1792 movr(_RAX_REGNO, r2);
1797 else if (r3 == _RDX_REGNO) {
1798 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1799 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1800 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1801 jit_class_gpr|jit_class_named);
1804 movr(div, _RDX_REGNO);
1805 if (r2 != _RAX_REGNO)
1806 movr(_RAX_REGNO, r2);
1809 if (r2 != _RAX_REGNO)
1810 movr(_RAX_REGNO, r2);
1811 movr(r0, _RDX_REGNO);
1816 if (r2 != _RAX_REGNO)
1817 movr(_RAX_REGNO, r2);
1821 sign_extend_rdx_rax();
1825 ixorr(_RDX_REGNO, _RDX_REGNO);
1831 if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1832 xchgr(_RAX_REGNO, _RDX_REGNO);
1834 if (r0 != _RDX_REGNO)
1835 movr(r0, _RAX_REGNO);
1836 movr(r1, _RDX_REGNO);
1837 if (r0 == _RDX_REGNO)
1838 movr(r0, _RAX_REGNO);
1841 clear(_RDX_REGNO, _RDX);
1842 clear(_RAX_REGNO, _RAX);
1846 _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1847 jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1851 reg = jit_get_reg(jit_class_gpr);
1854 qdivr(r0, r1, r2, rn(reg));
1856 qdivr_u(r0, r1, r2, rn(reg));
1861 _andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1876 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1884 else if (r0 == r1) {
1885 if (can_sign_extend_int_p(i0))
1888 reg = jit_get_reg(jit_class_gpr);
1901 _orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1916 _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1923 else if (can_sign_extend_int_p(i0)) {
1927 else if (r0 != r1) {
1932 reg = jit_get_reg(jit_class_gpr);
1940 _xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1955 _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1962 else if (can_sign_extend_int_p(i0)) {
1966 else if (r0 != r1) {
1971 reg = jit_get_reg(jit_class_gpr);
1979 _irotshr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1981 rex(0, WIDE, _RCX_REGNO, _NOREG, r0);
1983 mrm(0x03, code, r7(r0));
1987 _rotshr(jit_state_t *_jit, jit_int32_t code,
1988 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1993 if (r0 == _RCX_REGNO) {
1994 reg = jit_get_reg(jit_class_gpr);
1996 if (r2 != _RCX_REGNO)
1997 movr(_RCX_REGNO, r2);
1998 irotshr(code, rn(reg));
1999 movr(_RCX_REGNO, rn(reg));
2002 else if (r2 != _RCX_REGNO) {
2003 use = !jit_reg_free_p(_RCX);
2005 reg = jit_get_reg(jit_class_gpr);
2006 movr(rn(reg), _RCX_REGNO);
2010 if (r1 == _RCX_REGNO) {
2012 xchgr(r0, _RCX_REGNO);
2015 movr(_RCX_REGNO, r2);
2019 movr(_RCX_REGNO, r2);
2024 movr(_RCX_REGNO, rn(reg));
2035 _irotshi(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
2037 rex(0, WIDE, _NOREG, _NOREG, r0);
2040 mrm(0x03, code, r7(r0));
2044 mrm(0x03, code, r7(r0));
2050 _rotshi(jit_state_t *_jit, jit_int32_t code,
2051 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2055 irotshi(code, r0, i0);
2059 _xlshr(jit_state_t *_jit, jit_bool_t sign,
2060 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
2062 jit_int32_t sav, set;
2063 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
2064 jit_word_t over, zero, over_done, done;
2066 /* %RCX must be used for shift. */
2067 qsavset(_RCX_REGNO);
2068 allocr(_RCX_REGNO, _RCX);
2069 /* Almost certainly not %RCX */
2071 if (r0 == _RCX_REGNO) {
2072 s0 = jit_get_reg(jit_class_gpr);
2077 /* r0 == r1 is undefined behavior */
2078 if (r1 == _RCX_REGNO) {
2079 s1 = jit_get_reg(jit_class_gpr);
2083 /* Allocate a temporary if a register is used more than once, or if
2084 * the value to shift is %RCX */
2085 if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
2086 s2 = jit_get_reg(jit_class_gpr);
2092 /* Allocate temporary if shift is also one of the outputs */
2093 if (r0 == r3 || r1 == r3) {
2094 s3 = jit_get_reg(jit_class_gpr);
2100 /* Bits to shift right */
2103 /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
2104 movr(_RCX_REGNO, t3);
2105 /* Copy value to low register */
2107 /* SHLD shifts t0 left pulling extra bits in the right from t1.
2108 * It is very handly to shift bignums, but lightning does not support
2109 * these, nor 128 bit integers. The use of q{l,}sh{r,i} is to verify
2110 * if there precision loss in a shift and/or have it as a quick way
2111 * to multiply or divide by powers of two. */
2113 rex(0, WIDE, t1, _NOREG, t0);
2116 mrm(0x03, r7(t1), r7(t0));
2117 /* Must swap results if shift value is __WORDSIZE */
2118 alui(X86_CMP, t3, __WORDSIZE);
2119 over = jes(_jit->pc.w);
2120 /* Calculate bits to shift right and fill high register */
2121 rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
2123 rshr(t1, t2, _RCX_REGNO);
2125 rshr_u(t1, t2, _RCX_REGNO);
2126 /* FIXME t3 == %rcx only happens in 32 bit as %a3 (JIT_A3) is not
2127 * available -- it might be made available at some point, to
2128 * allow optimizing usage or arguments in registers. For now
2129 * keep the code, as one might cheat and use _RCX directly,
2130 * what is not officially supported, but *must* work. */
2131 /* Need to sign extend high register if shift value is zero */
2132 if (t3 == _RCX_REGNO)
2133 alui(X86_CMP, t3, __WORDSIZE);
2135 alui(X86_CMP, t3, 0);
2137 zero = jes(_jit->pc.w);
2138 done = jmpsi(_jit->pc.w);
2139 /* Swap registers if shift is __WORDSIZE */
2140 patch_at(over, _jit->pc.w);
2142 over_done = jmpsi(_jit->pc.w);
2143 /* If shift value is zero */
2144 patch_at(zero, _jit->pc.w);
2146 rshi(t1, t2, __WORDSIZE - 1);
2149 patch_at(over_done, _jit->pc.w);
2150 patch_at(done, _jit->pc.w);
2151 /* Release %RCX (if spilled) after branches */
2152 clear(_RCX_REGNO, _RCX);
2168 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2173 lea(0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
2175 rotshi(X86_SHL, r0, r1, i0);
2179 _xlshi(jit_state_t *_jit, jit_bool_t sign,
2180 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
2185 rshi(r1, r2, __WORDSIZE - 1);
2189 else if (i0 == __WORDSIZE) {
2194 assert((jit_uword_t)i0 <= __WORDSIZE);
2196 rshi(r1, r2, __WORDSIZE - i0);
2198 rshi_u(r1, r2, __WORDSIZE - i0);
2204 _xrshr(jit_state_t *_jit, jit_bool_t sign,
2205 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
2207 jit_int32_t sav, set;
2208 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
2209 jit_word_t over, zero, done;
2211 /* %RCX must be used for shift. */
2212 qsavset(_RCX_REGNO);
2213 allocr(_RCX_REGNO, _RCX);
2214 /* Almost certainly not %RCX */
2216 if (r0 == _RCX_REGNO) {
2217 s0 = jit_get_reg(jit_class_gpr);
2222 /* r0 == r1 is undefined behavior */
2223 if (r1 == _RCX_REGNO) {
2224 s1 = jit_get_reg(jit_class_gpr);
2228 /* Allocate a temporary if a register is used more than once, or if
2229 * the value to shift is %RCX */
2230 if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
2231 s2 = jit_get_reg(jit_class_gpr);
2237 /* Allocate temporary if shift is also one of the outputs */
2238 if (r0 == r3 || r1 == r3) {
2239 s3 = jit_get_reg(jit_class_gpr);
2245 /* Bits to shift left */
2247 rshi(t1, t2, __WORDSIZE - 1);
2251 /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
2252 movr(_RCX_REGNO, t3);
2253 /* Copy value to low register */
2255 /* SHRD shifts t0 right pulling extra bits in the left from t1 */
2257 rex(0, WIDE, t1, _NOREG, t0);
2260 mrm(0x03, r7(t1), r7(t0));
2261 /* Must swap results if shift value is __WORDSIZE */
2262 alui(X86_CMP, t3, __WORDSIZE);
2263 over = jes(_jit->pc.w);
2264 /* Already zero or sign extended if shift value is zero */
2265 alui(X86_CMP, t3, 0);
2266 zero = jes(_jit->pc.w);
2267 /* Calculate bits to shift left and fill high register */
2268 rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
2269 lshr(t1, t2, _RCX_REGNO);
2270 done = jmpsi(_jit->pc.w);
2271 /* Swap registers if shift is __WORDSIZE */
2272 patch_at(over, _jit->pc.w);
2274 /* If shift value is zero */
2275 patch_at(zero, _jit->pc.w);
2276 patch_at(done, _jit->pc.w);
2277 /* Release %RCX (if spilled) after branches */
2278 clear(_RCX_REGNO, _RCX);
2294 _xrshi(jit_state_t *_jit, jit_bool_t sign,
2295 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
2300 rshi(r1, r2, __WORDSIZE - 1);
2304 else if (i0 == __WORDSIZE) {
2307 rshi(r0, r2, __WORDSIZE - 1);
2312 assert((jit_uword_t)i0 <= __WORDSIZE);
2313 lshi(r1, r2, __WORDSIZE - i0);
2322 _unr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
2324 rex(0, WIDE, _NOREG, _NOREG, r0);
2326 mrm(0x03, code, r7(r0));
2330 _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2341 _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2349 _incr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2353 rex(0, WIDE, _NOREG, _NOREG, r0);
2362 _decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2366 rex(0, WIDE, _NOREG, _NOREG, r0);
2376 _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2383 _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2390 rex(0, WIDE, r0, _NOREG, r1);
2393 mrm(0x3, r7(r0), r7(r1));
2395 /* jump if undefined: r1 == 0 */
2396 w = jccs(X86_CC_E, _jit->pc.w);
2397 /* count leading zeros */
2398 rsbi(r0, r0, __WORDSIZE - 1);
2400 x = jmpsi(_jit->pc.w);
2402 patch_at(w, _jit->pc.w);
2403 movi(r0, __WORDSIZE);
2405 patch_at(x, _jit->pc.w);
2407 /* LZCNT has defined behavior for value zero and count leading zeros */
2411 _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2418 _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2424 t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk);
2428 movi(rn(t0), __WORDSIZE);
2434 rex(0, WIDE, r0, _NOREG, r1);
2437 mrm(0x3, r7(r0), r7(r1));
2439 /* No conditional move or need spill/reload a temporary */
2441 w = jccs(X86_CC_E, _jit->pc.w);
2442 movi(r0, __WORDSIZE);
2443 patch_at(w, _jit->pc.w);
2447 rex(0, WIDE, r0, _NOREG, rn(t0));
2450 mrm(0x3, r7(r0), r7(rn(t0)));
2454 /* TZCNT has defined behavior for value zero */
2458 _rbitr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
2461 jit_int32_t sav, set;
2462 jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
2463 static const unsigned char swap_tab[256] = {
2464 0, 128, 64, 192, 32, 160, 96, 224,
2465 16, 144, 80, 208, 48, 176, 112, 240,
2466 8, 136, 72, 200, 40, 168, 104, 232,
2467 24, 152, 88, 216 ,56, 184, 120, 248,
2468 4, 132, 68, 196, 36, 164, 100, 228,
2469 20, 148, 84, 212, 52, 180, 116, 244,
2470 12, 140, 76, 204, 44, 172, 108, 236,
2471 28, 156, 92, 220, 60, 188, 124, 252,
2472 2, 130, 66, 194, 34, 162, 98, 226,
2473 18, 146, 82, 210, 50, 178, 114, 242,
2474 10, 138, 74, 202, 42, 170, 106, 234,
2475 26, 154, 90, 218, 58, 186, 122, 250,
2476 6, 134, 70, 198, 38, 166, 102, 230,
2477 22, 150, 86, 214, 54, 182, 118, 246,
2478 14, 142, 78, 206, 46, 174, 110, 238,
2479 30, 158, 94, 222, 62, 190, 126, 254,
2480 1, 129, 65, 193, 33, 161, 97, 225,
2481 17, 145, 81, 209, 49, 177, 113, 241,
2482 9, 137, 73, 201, 41, 169, 105, 233,
2483 25, 153, 89, 217, 57, 185, 121, 249,
2484 5, 133, 69, 197, 37, 165, 101, 229,
2485 21, 149, 85, 213, 53, 181, 117, 245,
2486 13, 141, 77, 205, 45, 173, 109, 237,
2487 29, 157, 93, 221, 61, 189, 125, 253,
2488 3, 131, 67, 195, 35, 163, 99, 227,
2489 19, 147, 83, 211, 51, 179, 115, 243,
2490 11, 139, 75, 203, 43, 171, 107, 235,
2491 27, 155, 91, 219, 59, 187, 123, 251,
2492 7, 135, 71, 199, 39, 167, 103, 231,
2493 23, 151, 87, 215, 55, 183, 119, 247,
2494 15, 143, 79, 207, 47, 175, 111, 239,
2495 31, 159, 95, 223, 63, 191, 127, 255
2498 isavset(_RCX_REGNO);
2499 allocr(_RCX_REGNO, _RCX);
2500 if (r0 == _RCX_REGNO) {
2501 t0 = jit_get_reg(jit_class_gpr);
2508 if (r1 == _RCX_REGNO || r0 == r1) {
2509 t1 = jit_get_reg(jit_class_gpr);
2517 t2 = jit_get_reg(jit_class_gpr);
2518 t3 = jit_get_reg(jit_class_gpr);
2519 #if __WORDSIZE == 32
2520 /* Avoid condition that causes running out of registers */
2521 if (!reg8_p(r1_reg)) {
2523 andr(rn(t2), r1_reg, rn(t2));
2527 extr_uc(rn(t2), r1_reg);
2528 movi(rn(t3), (jit_word_t)swap_tab);
2529 ldxr_uc(r0_reg, rn(t3), rn(t2));
2530 movi(_RCX_REGNO, 8);
2532 rshr(rn(t2), r1_reg, _RCX_REGNO);
2533 extr_uc(rn(t2), rn(t2));
2534 lshi(r0_reg, r0_reg, 8);
2535 ldxr_uc(rn(t2), rn(t3), rn(t2));
2536 orr(r0_reg, r0_reg, rn(t2));
2537 addi(_RCX_REGNO, _RCX_REGNO, 8);
2538 alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
2540 clear(_RCX_REGNO, _RCX);
2543 if (t1 != JIT_NOREG)
2545 if (t0 != JIT_NOREG) {
2552 _popcntr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
2556 rex(0, WIDE, r0, _NOREG, r1);
2559 mrm(0x3, r7(r0), r7(r1));
2563 jit_int32_t sav, set;
2564 jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
2565 static const unsigned char pop_tab[256] = {
2566 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
2567 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2568 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2569 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2570 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2571 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2572 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2573 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
2576 isavset(_RCX_REGNO);
2577 allocr(_RCX_REGNO, _RCX);
2578 if (r0 == _RCX_REGNO) {
2579 t0 = jit_get_reg(jit_class_gpr);
2586 if (r1 == _RCX_REGNO || r0 == r1) {
2587 t1 = jit_get_reg(jit_class_gpr);
2595 t2 = jit_get_reg(jit_class_gpr);
2596 t3 = jit_get_reg(jit_class_gpr);
2597 #if __WORDSIZE == 32
2598 /* Avoid condition that causes running out of registers */
2599 if (!reg8_p(r1_reg)) {
2601 andr(rn(t2), r1_reg, rn(t2));
2605 extr_uc(rn(t2), r1_reg);
2606 movi(rn(t3), (jit_word_t)pop_tab);
2607 ldxr_uc(r0_reg, rn(t3), rn(t2));
2608 movi(_RCX_REGNO, 8);
2610 rshr(rn(t2), r1_reg, _RCX_REGNO);
2611 extr_uc(rn(t2), rn(t2));
2612 ldxr_uc(rn(t2), rn(t3), rn(t2));
2613 addr(r0_reg, r0_reg, rn(t2));
2614 addi(_RCX_REGNO, _RCX_REGNO, 8);
2615 alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
2617 clear(_RCX_REGNO, _RCX);
2620 if (t1 != JIT_NOREG)
2622 if (t0 != JIT_NOREG) {
2630 _cr(jit_state_t *_jit,
2631 jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2636 same = r0 == r1 || r0 == r2;
2645 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2646 ixorr(rn(reg), rn(reg));
2655 _ci(jit_state_t *_jit,
2656 jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2670 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2671 ixorr(rn(reg), rn(reg));
2680 _ci0(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
2694 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2695 ixorr(rn(reg), rn(reg));
2704 _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2709 cr(X86_CC_L, r0, r1, r2);
2713 _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2716 ci(X86_CC_L, r0, r1, i0);
2718 ci0(X86_CC_S, r0, r1);
2722 _ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2727 cr(X86_CC_B, r0, r1, r2);
2731 _ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2736 cr(X86_CC_LE, r0, r1, r2);
2740 _ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2745 cr(X86_CC_BE, r0, r1, r2);
2749 _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2752 ci(X86_CC_BE, r0, r1, i0);
2754 ci0(X86_CC_E, r0, r1);
2758 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2763 cr(X86_CC_E, r0, r1, r2);
2767 _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2770 ci(X86_CC_E, r0, r1, i0);
2772 ci0(X86_CC_E, r0, r1);
2776 _ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2781 cr(X86_CC_GE, r0, r1, r2);
2785 _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2788 ci(X86_CC_GE, r0, r1, i0);
2790 ci0(X86_CC_NS, r0, r1);
2794 _ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2799 cr(X86_CC_AE, r0, r1, r2);
2803 _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2806 ci(X86_CC_AE, r0, r1, i0);
2808 ci0(X86_CC_NB, r0, r1);
2812 _gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2817 cr(X86_CC_G, r0, r1, r2);
2821 _gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2826 cr(X86_CC_A, r0, r1, r2);
2830 _gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2833 ci(X86_CC_A, r0, r1, i0);
2835 ci0(X86_CC_NE, r0, r1);
2839 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2844 cr(X86_CC_NE, r0, r1, r2);
2848 _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2851 ci(X86_CC_NE, r0, r1, i0);
2853 ci0(X86_CC_NE, r0, r1);
2857 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2860 rex(0, 1, r1, _NOREG, r0);
2862 ic(0xc0 | (r1 << 3) | r7(r0));
2867 _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2871 if (fits_uint32_p(i0)) {
2873 rex(0, 0, _NOREG, _NOREG, r0);
2878 else if (can_sign_extend_int_p(i0)) {
2879 rex(0, 1, _NOREG, _NOREG, r0);
2885 rex(0, 1, _NOREG, _NOREG, r0);
2901 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2907 rel = rel < 0 ? rel - 8 : rel + 8;
2908 if (can_sign_extend_int_p(rel)) {
2909 /* lea rel(%rip), %r0 */
2910 rex(0, WIDE, r0, _NOREG, _NOREG);
2913 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
2927 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2930 rex(0, WIDE, _NOREG, _NOREG, r0);
2938 _movcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2940 rex(0, WIDE, r0, _NOREG, r1);
2943 mrm(0x03, r7(r0), r7(r1));
2947 _movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2949 rex(0, WIDE, r0, _NOREG, r1);
2952 mrm(0x03, r7(r0), r7(r1));
2956 _movsr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2958 rex(0, WIDE, r0, _NOREG, r1);
2961 mrm(0x03, r7(r0), r7(r1));
2965 _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2967 rex(0, WIDE, r0, _NOREG, r1);
2970 mrm(0x03, r7(r0), r7(r1));
2974 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2975 jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
2977 jit_int32_t save_rax, restore_rax;
2978 jit_int32_t ascasr_reg, ascasr_use;
2979 if (r0 != _RAX_REGNO) { /* result not in %rax */
2980 if (r2 != _RAX_REGNO) { /* old value not in %rax */
2981 save_rax = jit_get_reg(jit_class_gpr);
2982 movr(rn(save_rax), _RAX_REGNO);
2990 if (r2 != _RAX_REGNO)
2991 movr(_RAX_REGNO, r2);
2992 if (r1 == _NOREG) { /* using immediate address */
2993 if (!can_sign_extend_int_p(i0)) {
2994 ascasr_reg = jit_get_reg(jit_class_gpr);
2995 if (ascasr_reg == _RAX) {
2996 ascasr_reg = jit_get_reg(jit_class_gpr);
2997 jit_unget_reg(_RAX);
3000 movi(rn(ascasr_reg), i0);
3007 ic(0xf0); /* lock */
3009 rex(0, WIDE, r3, _NOREG, rn(ascasr_reg));
3011 rex(0, WIDE, r3, _NOREG, r1);
3014 if (r1 != _NOREG) /* casr */
3015 rx(r3, 0, r1, _NOREG, _SCL1);
3018 rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */
3020 rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */
3023 if (r0 != _RAX_REGNO)
3024 movr(r0, _RAX_REGNO);
3026 movr(_RAX_REGNO, rn(save_rax));
3027 jit_unget_reg(save_rax);
3030 jit_unget_reg(ascasr_reg);
3034 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3036 assert(jit_cmov_p());
3040 rex(0, WIDE, r0, _NOREG, r1);
3043 mrm(0x03, r7(r0), r7(r1));
3047 _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3049 assert(jit_cmov_p());
3053 rex(0, WIDE, r0, _NOREG, r1);
3056 mrm(0x03, r7(r0), r7(r1));
3061 _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3063 rex(0, 1, r0, _NOREG, r1);
3065 mrm(0x03, r7(r0), r7(r1));
3069 _movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3071 rex(0, 0, r1, _NOREG, r0);
3073 ic(0xc0 | (r1 << 3) | r7(r0));
3078 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3082 rex(0, 0, _NOREG, _NOREG, r0);
3084 mrm(0x03, X86_ROR, r7(r0));
3089 _bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3092 rex(0, 0, _NOREG, _NOREG, r0);
3097 #if __X64 && !__X64_32
3099 _bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3102 rex(0, 1, _NOREG, _NOREG, r0);
3109 _extr(jit_state_t *_jit,
3110 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3113 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3114 if (i1 == __WORDSIZE)
3117 if (__WORDSIZE - (i0 + i1)) {
3118 lshi(r0, r1, __WORDSIZE - (i0 + i1));
3119 rshi(r0, r0, __WORDSIZE - i1);
3122 rshi(r0, r1, __WORDSIZE - i1);
3127 _extr_u(jit_state_t *_jit,
3128 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3132 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3133 if (i1 == __WORDSIZE)
3135 /* Only cheaper in code size or number of instructions if i0 is not zero */
3136 /* Number of cpu cicles not tested */
3137 else if (i0 && jit_cpu.bmi2) {
3138 mask = ((ONE << i1) - 1) << i0;
3139 t0 = jit_get_reg(jit_class_gpr);
3142 vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 2);
3144 mrm(0x03, r7(r0), r7(rn(t0)));
3150 andi(r0, r0, (ONE << i1) - 1);
3155 _depr(jit_state_t *_jit,
3156 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3160 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3161 if (i1 == __WORDSIZE)
3163 /* Only cheaper in code size or number of instructions if i0 is not zero */
3164 /* Number of cpu cicles not tested */
3165 else if (i0 && jit_cpu.bmi2) {
3166 mask = ((ONE << i1) - 1) << i0;
3167 t0 = jit_get_reg(jit_class_gpr);
3168 t1 = jit_get_reg(jit_class_gpr);
3172 vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 3);
3174 mrm(0x03, r7(r0), r7(rn(t0)));
3175 andi(rn(t1), rn(t1), ~mask);
3176 orr(r0, r0, rn(t1));
3181 mask = (ONE << i1) - 1;
3182 t0 = jit_get_reg(jit_class_gpr);
3183 andi(rn(t0), r1, mask);
3185 lshi(rn(t0), rn(t0), i0);
3188 andi(r0, r0, ~mask);
3189 orr(r0, r0, rn(t0));
3195 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3201 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3209 _extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3215 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3217 movcr_u(r0, rn(reg));
3223 _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3225 rex(0, WIDE, r0, _NOREG, r1);
3228 rx(r0, 0, r1, _NOREG, _SCL1);
3232 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3236 jit_word_t rel = i0 - _jit->pc.w;
3237 rel = rel < 0 ? rel - 8 : rel + 8;
3238 if (can_sign_extend_int_p(rel)) {
3239 rex(0, WIDE, r0, _NOREG, _NOREG);
3242 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3246 if (address_p(i0)) {
3247 rex(0, WIDE, r0, _NOREG, _NOREG);
3250 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3253 reg = jit_get_reg(jit_class_gpr);
3261 _ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3263 rex(0, WIDE, r0, _NOREG, r1);
3266 rx(r0, 0, r1, _NOREG, _SCL1);
3270 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3274 jit_word_t rel = i0 - _jit->pc.w;
3275 rel = rel < 0 ? rel - 8 : rel + 8;
3276 if (can_sign_extend_int_p(rel)) {
3277 rex(0, WIDE, r0, _NOREG, _NOREG);
3280 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3284 if (address_p(i0)) {
3285 rex(0, WIDE, r0, _NOREG, _NOREG);
3288 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3291 reg = jit_get_reg(jit_class_gpr);
3293 ldr_uc(r0, rn(reg));
3299 _ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3301 rex(0, WIDE, r0, _NOREG, r1);
3304 rx(r0, 0, r1, _NOREG, _SCL1);
3308 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3312 jit_word_t rel = i0 - _jit->pc.w;
3313 rel = rel < 0 ? rel - 8 : rel + 8;
3314 if (can_sign_extend_int_p(rel)) {
3315 rex(0, WIDE, r0, _NOREG, _NOREG);
3318 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3322 if (address_p(i0)) {
3323 rex(0, WIDE, r0, _NOREG, _NOREG);
3326 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3329 reg = jit_get_reg(jit_class_gpr);
3337 _ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3339 rex(0, WIDE, r0, _NOREG, r1);
3342 rx(r0, 0, r1, _NOREG, _SCL1);
3346 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3350 jit_word_t rel = i0 - _jit->pc.w;
3351 rel = rel < 0 ? rel - 8 : rel + 8;
3352 if (can_sign_extend_int_p(rel)) {
3353 rex(0, WIDE, r0, _NOREG, _NOREG);
3356 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3360 if (address_p(i0)) {
3361 rex(0, WIDE, r0, _NOREG, _NOREG);
3364 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3367 reg = jit_get_reg(jit_class_gpr);
3369 ldr_us(r0, rn(reg));
3374 #if __X32 || !__X64_32
3376 _ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3379 rex(0, WIDE, r0, _NOREG, r1);
3384 rx(r0, 0, r1, _NOREG, _SCL1);
3388 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3392 jit_word_t rel = i0 - _jit->pc.w;
3393 rel = rel < 0 ? rel - 8 : rel + 8;
3394 if (can_sign_extend_int_p(rel)) {
3395 rex(0, WIDE, r0, _NOREG, _NOREG);
3397 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3401 if (address_p(i0)) {
3403 rex(0, WIDE, r0, _NOREG, _NOREG);
3408 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3411 reg = jit_get_reg(jit_class_gpr);
3421 _ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3423 rex(0, 0, r0, _NOREG, r1);
3425 rx(r0, 0, r1, _NOREG, _SCL1);
3429 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3433 jit_word_t rel = i0 - _jit->pc.w;
3434 rel = rel < 0 ? rel - 8 : rel + 8;
3435 if (can_sign_extend_int_p(rel)) {
3436 rex(0, 0, r0, _NOREG, _NOREG);
3438 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3442 if (address_p(i0)) {
3443 rex(0, 0, r0, _NOREG, _NOREG);
3445 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3448 reg = jit_get_reg(jit_class_gpr);
3453 ldr_ui(r0, rn(reg));
3461 _ldr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3463 rex(0, 1, r0, _NOREG, r1);
3465 rx(r0, 0, r1, _NOREG, _SCL1);
3469 _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3472 jit_word_t rel = i0 - _jit->pc.w;
3473 rel = rel < 0 ? rel - 8 : rel + 8;
3474 if (can_sign_extend_int_p(rel)) {
3475 rex(0, WIDE, r0, _NOREG, _NOREG);
3477 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3479 else if (can_sign_extend_int_p(i0)) {
3480 rex(0, WIDE, r0, _NOREG, _NOREG);
3482 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3485 reg = jit_get_reg(jit_class_gpr);
3495 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3501 rex(0, WIDE, r0, r1, r2);
3504 rx(r0, 0, r2, r1, _SCL1);
3509 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3512 if (can_sign_extend_int_p(i0)) {
3513 rex(0, WIDE, r0, _NOREG, r1);
3516 rx(r0, i0, r1, _NOREG, _SCL1);
3519 reg = jit_get_reg(jit_class_gpr);
3521 ldxr_c(r0, r1, rn(reg));
3527 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3533 rex(0, WIDE, r0, r1, r2);
3536 rx(r0, 0, r2, r1, _SCL1);
3541 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3544 if (can_sign_extend_int_p(i0)) {
3545 rex(0, WIDE, r0, _NOREG, r1);
3548 rx(r0, i0, r1, _NOREG, _SCL1);
3551 reg = jit_get_reg(jit_class_gpr);
3553 ldxr_uc(r0, r1, rn(reg));
3559 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3565 rex(0, WIDE, r0, r1, r2);
3568 rx(r0, 0, r2, r1, _SCL1);
3573 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3576 if (can_sign_extend_int_p(i0)) {
3577 rex(0, WIDE, r0, _NOREG, r1);
3580 rx(r0, i0, r1, _NOREG, _SCL1);
3583 reg = jit_get_reg(jit_class_gpr);
3585 ldxr_s(r0, r1, rn(reg));
3591 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3597 rex(0, WIDE, r0, r1, r2);
3600 rx(r0, 0, r2, r1, _SCL1);
3605 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3608 if (can_sign_extend_int_p(i0)) {
3609 rex(0, WIDE, r0, _NOREG, r1);
3612 rx(r0, i0, r1, _NOREG, _SCL1);
3615 reg = jit_get_reg(jit_class_gpr);
3617 ldxr_us(r0, r1, rn(reg));
3622 #if __X64 || !__X64_32
3624 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3627 rex(0, WIDE, r0, r1, r2);
3632 rx(r0, 0, r2, r1, _SCL1);
3636 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3639 if (can_sign_extend_int_p(i0)) {
3641 rex(0, WIDE, r0, _NOREG, r1);
3646 rx(r0, i0, r1, _NOREG, _SCL1);
3649 reg = jit_get_reg(jit_class_gpr);
3651 ldxr_i(r0, r1, rn(reg));
3659 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3663 /* to avoid confusion with macro renames */
3664 _ldr_ui(_jit, r0, r0);
3666 rex(0, 0, r0, r1, r2);
3668 rx(r0, 0, r2, r1, _SCL1);
3673 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3676 if (can_sign_extend_int_p(i0)) {
3677 rex(0, 0, r0, _NOREG, r1);
3679 rx(r0, i0, r1, _NOREG, _SCL1);
3682 reg = jit_get_reg(jit_class_gpr);
3685 ldxr_i(r0, r1, rn(reg));
3687 ldxr_ui(r0, r1, rn(reg));
3695 _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3697 rex(0, 1, r0, r1, r2);
3699 rx(r0, 0, r2, r1, _SCL1);
3703 _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3706 if (can_sign_extend_int_p(i0)) {
3707 rex(0, 1, r0, _NOREG, r1);
3709 rx(r0, i0, r1, _NOREG, _SCL1);
3712 reg = jit_get_reg(jit_class_gpr);
3714 ldxr_l(r0, r1, rn(reg));
3722 _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3726 rex(0, 0, r1, _NOREG, r0);
3728 rx(r1, 0, r0, _NOREG, _SCL1);
3731 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3733 rex(0, 0, rn(reg), _NOREG, r0);
3735 rx(rn(reg), 0, r0, _NOREG, _SCL1);
3741 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3745 jit_word_t rel = i0 - _jit->pc.w;
3746 rel = rel < 0 ? rel - 16 : rel + 16;
3747 if (can_sign_extend_int_p(rel)) {
3749 rex(0, 0, r0, _NOREG, _NOREG);
3751 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3754 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3756 rex(0, 0, rn(reg), _NOREG, _NOREG);
3758 rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3764 if (address_p(i0)) {
3766 rex(0, 0, r0, _NOREG, _NOREG);
3768 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3771 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3773 rex(0, 0, rn(reg), _NOREG, _NOREG);
3775 rx(rn(reg), i0, _NOREG, _NOREG, _SCL1);
3780 reg = jit_get_reg(jit_class_gpr);
3788 _str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3791 rex(0, 0, r1, _NOREG, r0);
3793 rx(r1, 0, r0, _NOREG, _SCL1);
3797 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3801 jit_word_t rel = i0 - _jit->pc.w;
3802 rel = rel < 0 ? rel - 8 : rel + 8;
3803 if (can_sign_extend_int_p(rel)) {
3805 rex(0, 0, r0, _NOREG, _NOREG);
3807 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3811 if (address_p(i0)) {
3813 rex(0, 0, r0, _NOREG, _NOREG);
3815 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3818 reg = jit_get_reg(jit_class_gpr);
3826 _str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3828 rex(0, 0, r1, _NOREG, r0);
3830 rx(r1, 0, r0, _NOREG, _SCL1);
3834 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3838 jit_word_t rel = i0 - _jit->pc.w;
3839 rel = rel < 0 ? rel - 8 : rel + 8;
3840 if (can_sign_extend_int_p(rel)) {
3841 rex(0, 0, r0, _NOREG, _NOREG);
3843 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3847 if (address_p(i0)) {
3848 rex(0, 0, r0, _NOREG, _NOREG);
3850 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3853 reg = jit_get_reg(jit_class_gpr);
3860 #if __X64 && !__X64_32
3862 _str_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3864 rex(0, 1, r1, _NOREG, r0);
3866 rx(r1, 0, r0, _NOREG, _SCL1);
3870 _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3874 jit_word_t rel = i0 - _jit->pc.w;
3875 rel = rel < 0 ? rel - 8 : rel + 8;
3876 if (can_sign_extend_int_p(rel)) {
3877 rex(0, WIDE, r0, _NOREG, _NOREG);
3879 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3883 if (can_sign_extend_int_p(i0)) {
3884 rex(0, WIDE, r0, _NOREG, _NOREG);
3886 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3889 reg = jit_get_reg(jit_class_gpr);
3898 _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3902 reg = jit_get_reg(jit_class_gpr);
3903 addr(rn(reg), r0, r1);
3908 rex(0, 0, r2, r1, r0);
3910 rx(r2, 0, r0, r1, _SCL1);
3913 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3915 rex(0, 0, rn(reg), r1, r0);
3917 rx(rn(reg), 0, r0, r1, _SCL1);
3924 _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3927 if (can_sign_extend_int_p(i0)) {
3929 rex(0, 0, r1, _NOREG, r0);
3931 rx(r1, i0, r0, _NOREG, _SCL1);
3934 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3936 rex(0, 0, rn(reg), _NOREG, r0);
3938 rx(rn(reg), i0, r0, _NOREG, _SCL1);
3943 reg = jit_get_reg(jit_class_gpr);
3945 stxr_c(rn(reg), r0, r1);
3951 _stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3955 reg = jit_get_reg(jit_class_gpr);
3956 addr(rn(reg), r0, r1);
3961 rex(0, 0, r2, r1, r0);
3963 rx(r2, 0, r0, r1, _SCL1);
3968 _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3971 if (can_sign_extend_int_p(i0)) {
3973 rex(0, 0, r1, _NOREG, r0);
3975 rx(r1, i0, r0, _NOREG, _SCL1);
3978 reg = jit_get_reg(jit_class_gpr);
3980 stxr_s(rn(reg), r0, r1);
3986 _stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3990 reg = jit_get_reg(jit_class_gpr);
3991 addr(rn(reg), r0, r1);
3995 rex(0, 0, r2, r1, r0);
3997 rx(r2, 0, r0, r1, _SCL1);
4002 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4005 if (can_sign_extend_int_p(i0)) {
4006 rex(0, 0, r1, _NOREG, r0);
4008 rx(r1, i0, r0, _NOREG, _SCL1);
4011 reg = jit_get_reg(jit_class_gpr);
4013 stxr_i(rn(reg), r0, r1);
4018 #if __X64 && !__X64_32
4020 _stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
4022 rex(0, 1, r2, r1, r0);
4024 rx(r2, 0, r0, r1, _SCL1);
4028 _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4031 if (can_sign_extend_int_p(i0)) {
4032 rex(0, 1, r1, _NOREG, r0);
4034 rx(r1, i0, r0, _NOREG, _SCL1);
4037 reg = jit_get_reg(jit_class_gpr);
4039 stxr_l(rn(reg), r0, r1);
4046 _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
4058 _jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
4071 _jcr(jit_state_t *_jit,
4072 jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4074 alur(X86_CMP, r0, r1);
4075 return (jcc(code, i0));
4079 _jci(jit_state_t *_jit,
4080 jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4082 alui(X86_CMP, r0, i1);
4083 return (jcc(code, i0));
4087 _jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
4090 return (jcc(code, i0));
4094 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4096 return (jcr(X86_CC_L, i0, r0, r1));
4100 _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4103 if (i1) w = jci (X86_CC_L, i0, r0, i1);
4104 else w = jci0(X86_CC_S, i0, r0);
4109 _bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4111 return (jcr(X86_CC_B, i0, r0, r1));
4115 _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4118 if (i1) w = jci (X86_CC_B, i0, r0, i1);
4119 else w = jci0(X86_CC_B, i0, r0);
4124 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4127 if (r0 == r1) w = jmpi(i0);
4128 else w = jcr (X86_CC_LE, i0, r0, r1);
4133 _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4136 if (i1) w = jci (X86_CC_LE, i0, r0, i1);
4137 else w = jci0(X86_CC_LE, i0, r0);
4142 _bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4145 if (r0 == r1) w = jmpi(i0);
4146 else w = jcr (X86_CC_BE, i0, r0, r1);
4151 _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4154 if (i1) w = jci (X86_CC_BE, i0, r0, i1);
4155 else w = jci0(X86_CC_BE, i0, r0);
4160 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4163 if (r0 == r1) w = jmpi(i0);
4164 else w = jcr (X86_CC_E, i0, r0, r1);
4169 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4172 if (i1) w = jci (X86_CC_E, i0, r0, i1);
4173 else w = jci0(X86_CC_E, i0, r0);
4178 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4181 if (r0 == r1) w = jmpi(i0);
4182 else w = jcr (X86_CC_GE, i0, r0, r1);
4187 _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4190 if (i1) w = jci (X86_CC_GE, i0, r0, i1);
4191 else w = jci0(X86_CC_NS, i0, r0);
4196 _bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4199 if (r0 == r1) w = jmpi(i0);
4200 else w = jcr (X86_CC_AE, i0, r0, r1);
4205 _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4208 if (i1) w = jci (X86_CC_AE, i0, r0, i1);
4214 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4216 return (jcr(X86_CC_G, i0, r0, r1));
4220 _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4222 return (jci(X86_CC_G, i0, r0, i1));
4226 _bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4228 return (jcr(X86_CC_A, i0, r0, r1));
4232 _bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4235 if (i1) w = jci (X86_CC_A, i0, r0, i1);
4236 else w = jci0(X86_CC_NE, i0, r0);
4241 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4243 return (jcr(X86_CC_NE, i0, r0, r1));
4247 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4250 if (i1) w = jci (X86_CC_NE, i0, r0, i1);
4251 else w = jci0(X86_CC_NE, i0, r0);
4256 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4263 _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4266 if (can_zero_extend_int_p(i1))
4269 reg = jit_get_reg(jit_class_gpr);
4278 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4285 _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4288 if (can_zero_extend_int_p(i1))
4291 reg = jit_get_reg(jit_class_gpr);
4300 _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4307 _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4310 if (can_sign_extend_int_p(i1)) {
4314 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4317 return (boaddr(i0, r0, rn(reg)));
4321 _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4328 _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4331 if (can_sign_extend_int_p(i1)) {
4335 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4338 return (boaddr_u(i0, r0, rn(reg)));
4342 _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4349 _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4352 if (can_sign_extend_int_p(i1)) {
4356 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4359 return (bxaddr(i0, r0, rn(reg)));
4363 _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4370 _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4373 if (can_sign_extend_int_p(i1)) {
4377 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4380 return (bxaddr_u(i0, r0, rn(reg)));
4384 _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4391 _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4394 if (can_sign_extend_int_p(i1)) {
4398 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4401 return (bosubr(i0, r0, rn(reg)));
4405 _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4412 _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4415 if (can_sign_extend_int_p(i1)) {
4419 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4422 return (bosubr_u(i0, r0, rn(reg)));
4426 _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4433 _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4436 if (can_sign_extend_int_p(i1)) {
4440 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4443 return (bxsubr(i0, r0, rn(reg)));
4447 _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4454 _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4457 if (can_sign_extend_int_p(i1)) {
4461 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4464 return (bxsubr_u(i0, r0, rn(reg)));
4468 _callr(jit_state_t *_jit, jit_int32_t r0)
4470 rex(0, 0, _NOREG, _NOREG, r0);
4472 mrm(0x03, 0x02, r7(r0));
4476 _calli(jit_state_t *_jit, jit_word_t i0)
4480 jit_word_t l = _jit->pc.w + 5;
4485 !((d < 0) ^ (l < 0)) &&
4487 (jit_int32_t)d == d) {
4502 _calli_p(jit_state_t *_jit, jit_word_t i0)
4506 reg = jit_get_reg(jit_class_gpr);
4507 w = movi_p(rn(reg), i0);
4515 _jmpr(jit_state_t *_jit, jit_int32_t r0)
4517 rex(0, 0, _NOREG, _NOREG, r0);
4519 mrm(0x03, 0x04, r7(r0));
4523 _jmpi(jit_state_t *_jit, jit_word_t i0)
4527 jit_word_t l = _jit->pc.w + 5;
4532 !((d < 0) ^ (l < 0)) &&
4534 (jit_int32_t)d == d) {
4549 _jmpi_p(jit_state_t *_jit, jit_word_t i0)
4553 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4554 w = movi_p(rn(reg), i0);
4562 _jmpsi(jit_state_t *_jit, jit_uint8_t i0)
4564 jit_word_t w = _jit->pc.w;
4574 _prolog(jit_state_t *_jit, jit_node_t *node)
4576 jit_int32_t reg, offs;
4577 if (_jitc->function->define_frame || _jitc->function->assume_frame) {
4578 jit_int32_t frame = -_jitc->function->frame;
4580 assert(_jitc->function->self.aoff >= frame);
4581 if (_jitc->function->assume_frame)
4583 _jitc->function->self.aoff = frame;
4585 if (_jitc->function->allocar)
4586 _jitc->function->self.aoff &= -16;
4587 #if __X64 && (__CYGWIN__ || _WIN32)
4588 _jitc->function->stack = (((/* first 32 bytes must be allocated */
4589 (_jitc->function->self.alen > 32 ?
4590 _jitc->function->self.alen : 32) -
4591 /* align stack at 16 bytes */
4592 _jitc->function->self.aoff) + 15) & -16);
4594 _jitc->function->stack = (((_jitc->function->self.alen -
4595 _jitc->function->self.aoff) + 15) & -16);
4598 if (_jitc->function->stack)
4599 _jitc->function->need_stack = 1;
4601 if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
4602 /* check if any callee save register needs to be saved */
4603 for (reg = 0; reg < _jitc->reglen; ++reg)
4604 if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
4605 (_rvs[reg].spec & jit_class_sav)) {
4606 _jitc->function->need_stack = 1;
4611 if (_jitc->function->need_frame || _jitc->function->need_stack)
4612 subi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4613 /* callee save registers */
4614 for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
4615 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4616 stxi(offs, _RSP_REGNO, rn(iregs[reg]));
4617 offs += REAL_WORDSIZE;
4620 #if __X64 && (__CYGWIN__ || _WIN32)
4621 for (reg = 0; reg < jit_size(fregs); reg++) {
4622 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
4623 sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg]));
4624 offs += sizeof(jit_float64_t);
4629 if (_jitc->function->need_frame) {
4630 stxi(0, _RSP_REGNO, _RBP_REGNO);
4631 movr(_RBP_REGNO, _RSP_REGNO);
4635 if (_jitc->function->stack)
4636 subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
4637 if (_jitc->function->allocar) {
4638 reg = jit_get_reg(jit_class_gpr);
4639 movi(rn(reg), _jitc->function->self.aoff);
4640 stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg));
4644 #if __X64 && !(__CYGWIN__ || _WIN32)
4645 if (_jitc->function->self.call & jit_call_varargs) {
4646 jit_word_t nofp_code;
4648 /* Save gp registers in the save area, if any is a vararg */
4649 for (reg = first_gp_from_offset(_jitc->function->vagp);
4650 jit_arg_reg_p(reg); ++reg)
4651 stxi(_jitc->function->vaoff + first_gp_offset +
4652 reg * 8, _RBP_REGNO, rn(JIT_RA0 - reg));
4654 reg = first_fp_from_offset(_jitc->function->vafp);
4655 if (jit_arg_f_reg_p(reg)) {
4656 /* Skip over if no float registers were passed as argument */
4662 /* Save fp registers in the save area, if any is a vararg */
4663 /* Note that the full 16 byte xmm is not saved, because
4664 * lightning only handles float and double, and, while
4665 * attempting to provide a va_list compatible pointer as
4666 * jit_va_start return, does not guarantee it (on all ports). */
4667 for (; jit_arg_f_reg_p(reg); ++reg)
4668 sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
4669 reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
4671 patch_at(nofp_code, _jit->pc.w);
4678 _epilog(jit_state_t *_jit, jit_node_t *node)
4680 jit_int32_t reg, offs;
4681 if (_jitc->function->assume_frame)
4683 if (_jitc->function->need_frame)
4684 movr(_RSP_REGNO, _RBP_REGNO);
4686 /* callee save registers */
4687 for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
4688 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4689 ldxi(rn(iregs[reg]), _RSP_REGNO, offs);
4690 offs += REAL_WORDSIZE;
4693 #if __X64 && (__CYGWIN__ || _WIN32)
4694 for (reg = 0; reg < jit_size(fregs); reg++) {
4695 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
4696 sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs);
4697 offs += sizeof(jit_float64_t);
4702 if (_jitc->function->need_frame) {
4703 ldxi(_RBP_REGNO, _RSP_REGNO, 0);
4704 addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4706 /* This condition does not happen as much as expected because
4707 * it is not safe to not create a frame pointer if any function
4708 * is called, even jit functions, as those might call external
4710 else if (_jitc->function->need_stack)
4711 addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4717 _vastart(jit_state_t *_jit, jit_int32_t r0)
4719 #if __X32 || __CYGWIN__ || _WIN32
4720 assert(_jitc->function->self.call & jit_call_varargs);
4721 addi(r0, _RBP_REGNO, jit_selfsize());
4725 assert(_jitc->function->self.call & jit_call_varargs);
4727 /* Return jit_va_list_t in the register argument */
4728 addi(r0, _RBP_REGNO, _jitc->function->vaoff);
4729 reg = jit_get_reg(jit_class_gpr);
4731 /* Initialize gp offset in the save area. */
4732 movi(rn(reg), _jitc->function->vagp);
4733 stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
4735 /* Initialize fp offset in the save area. */
4736 movi(rn(reg), _jitc->function->vafp);
4737 stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
4739 /* Initialize overflow pointer to the first stack argument. */
4740 addi(rn(reg), _RBP_REGNO, jit_selfsize());
4741 stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
4743 /* Initialize register save area pointer. */
4744 addi(rn(reg), r0, first_gp_offset);
4745 stxi(offsetof(jit_va_list_t, save), r0, rn(reg));
4752 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4754 #if __X32 || __CYGWIN__ || _WIN32
4755 assert(_jitc->function->self.call & jit_call_varargs);
4757 addi(r1, r1, va_gp_increment);
4764 assert(_jitc->function->self.call & jit_call_varargs);
4766 rg0 = jit_get_reg(jit_class_gpr);
4767 rg1 = jit_get_reg(jit_class_gpr);
4769 /* Load the gp offset in save area in the first temporary. */
4770 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
4772 /* Jump over if there are no remaining arguments in the save area. */
4773 icmpi(rn(rg0), va_gp_max_offset);
4776 /* Load the save area pointer in the second temporary. */
4777 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
4779 /* Load the vararg argument in the first argument. */
4780 ldxr(r0, rn(rg1), rn(rg0));
4782 /* Update the gp offset. */
4783 addi(rn(rg0), rn(rg0), 8);
4784 stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
4786 /* Will only need one temporary register below. */
4789 /* Jump over overflow code. */
4792 /* Where to land if argument is in overflow area. */
4793 patch_at(ge_code, _jit->pc.w);
4795 /* Load overflow pointer. */
4796 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
4798 /* Load argument. */
4801 /* Update overflow pointer. */
4802 addi(rn(rg0), rn(rg0), va_gp_increment);
4803 stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
4805 /* Where to land if argument is in save area. */
4806 patch_at(lt_code, _jit->pc.w);
4812 /* The x87 boolean argument tells if will put the result in a x87
4813 * register if non false, in a sse register otherwise. */
4815 _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
4817 #if __X32 || __CYGWIN__ || _WIN32
4818 assert(_jitc->function->self.call & jit_call_varargs);
4830 assert(_jitc->function->self.call & jit_call_varargs);
4832 rg0 = jit_get_reg(jit_class_gpr);
4833 rg1 = jit_get_reg(jit_class_gpr);
4835 /* Load the fp offset in save area in the first temporary. */
4836 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
4838 /* Jump over if there are no remaining arguments in the save area. */
4839 icmpi(rn(rg0), va_fp_max_offset);
4842 /* Load the save area pointer in the second temporary. */
4843 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
4845 /* Load the vararg argument in the first argument. */
4847 x87_ldxr_d(r0, rn(rg1), rn(rg0));
4849 sse_ldxr_d(r0, rn(rg1), rn(rg0));
4851 /* Update the fp offset. */
4852 addi(rn(rg0), rn(rg0), va_fp_increment);
4853 stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
4855 /* Will only need one temporary register below. */
4858 /* Jump over overflow code. */
4861 /* Where to land if argument is in overflow area. */
4862 patch_at(ge_code, _jit->pc.w);
4864 /* Load overflow pointer. */
4865 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
4867 /* Load argument. */
4869 x87_ldr_d(r0, rn(rg0));
4871 sse_ldr_d(r0, rn(rg0));
4873 /* Update overflow pointer. */
4874 addi(rn(rg0), rn(rg0), 8);
4875 stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
4877 /* Where to land if argument is in save area. */
4878 patch_at(lt_code, _jit->pc.w);
4885 _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
4888 jit_uint8_t *code = (jit_uint8_t *)instr;
4893 *(jit_word_t *)instr = label;
4895 /* forward pc relative address known to be in range */
4905 if (code[1] < 0x80 || code[1] > 0x8f)
4914 disp = label - (instr + 4);
4915 assert((jit_int32_t)disp == disp);
4916 *(jit_int32_t *)instr = disp;
4922 disp = label - (instr + 1);
4923 assert((jit_int8_t)disp == disp);
4924 *(jit_int8_t *)instr = disp;