2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 * This file is part of GNU lightning.
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
17 * Paulo Cesar Pereira de Andrade
20 /* avoid using it due to partial stalls */
24 # if __WORDSIZE == 64 && _WIN32
29 # if __X32 || __X64_32
31 # define ldi(u, v) ldi_i(u, v)
32 # define ldr(u, v) ldr_i(u, v)
33 # define ldxr(u, v, w) ldxr_i(u, v, w)
34 # define ldxi(u, v, w) ldxi_i(u, v, w)
35 # define str(u, v) str_i(u, v)
36 # define sti(u, v) sti_i(u, v)
37 # define stxr(u, v, w) stxr_i(u, v, w)
38 # define stxi(u, v, w) stxi_i(u, v, w)
39 # define can_sign_extend_int_p(im) 1
40 # define can_zero_extend_int_p(im) 1
41 # define fits_uint32_p(im) 1
44 # define ldi(u, v) ldi_l(u, v)
45 # define ldr(u, v) ldr_l(u, v)
46 # define ldxr(u, v, w) ldxr_l(u, v, w)
47 # define ldxi(u, v, w) ldxi_l(u, v, w)
48 # define str(u, v) str_l(u, v)
49 # define sti(u, v) sti_l(u, v)
50 # define stxr(u, v, w) stxr_l(u, v, w)
51 # define stxi(u, v, w) stxi_l(u, v, w)
52 # define can_sign_extend_int_p(im) \
53 (((long long)(im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \
54 ((long long)(im) < 0 && (long long)(im) > -0x80000000LL))
55 # define can_zero_extend_int_p(im) \
56 ((im) >= 0 && (im) < 0x80000000LL)
57 # define fits_uint32_p(im) (((im) & 0xffffffff00000000LL) == 0)
59 # if __X32 || __CYGWIN__ || __X64_32 || _WIN32
61 ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO)
75 # define _R10_REGNO 10
76 # define _R11_REGNO 11
77 # define _R12_REGNO 12
78 # define _R13_REGNO 13
79 # define _R14_REGNO 14
80 # define _R15_REGNO 15
81 # define r7(reg) ((reg) & 7)
82 # define r8(reg) ((reg) & 15)
88 # define X86_OR 1 << 3
89 # define X86_ADC 2 << 3
90 # define X86_SBB 3 << 3
91 # define X86_AND 4 << 3
92 # define X86_SUB 5 << 3
93 # define X86_XOR 6 << 3
94 # define X86_CMP 7 << 3
108 # define X86_CC_O 0x0
109 # define X86_CC_NO 0x1
110 # define X86_CC_NAE 0x2
111 # define X86_CC_B 0x2
112 # define X86_CC_C 0x2
113 # define X86_CC_AE 0x3
114 # define X86_CC_NB 0x3
115 # define X86_CC_NC 0x3
116 # define X86_CC_E 0x4
117 # define X86_CC_Z 0x4
118 # define X86_CC_NE 0x5
119 # define X86_CC_NZ 0x5
120 # define X86_CC_BE 0x6
121 # define X86_CC_NA 0x6
122 # define X86_CC_A 0x7
123 # define X86_CC_NBE 0x7
124 # define X86_CC_S 0x8
125 # define X86_CC_NS 0x9
126 # define X86_CC_P 0xa
127 # define X86_CC_PE 0xa
128 # define X86_CC_NP 0xb
129 # define X86_CC_PO 0xb
130 # define X86_CC_L 0xc
131 # define X86_CC_NGE 0xc
132 # define X86_CC_GE 0xd
133 # define X86_CC_NL 0xd
134 # define X86_CC_LE 0xe
135 # define X86_CC_NG 0xe
136 # define X86_CC_G 0xf
137 # define X86_CC_NLE 0xf
138 # define mrm(md, r, m) *_jit->pc.uc++ = (md<<6) | (r<<3) | m
139 # define sib(sc, i, b) *_jit->pc.uc++ = (sc<<6) | (i<<3) | b
140 # define ic(c) *_jit->pc.uc++ = c
141 # define is(s) *_jit->pc.us++ = s
142 # define ii(i) *_jit->pc.ui++ = i
143 # if __X64 && !__X64_32
144 # define il(l) *_jit->pc.ul++ = l
148 # define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b)
150 _rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
151 # define rx(rd, md, rb, ri, ms) _rx(_jit, rd, md, rb, ri, ms)
153 _rx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
155 * prefix 8 bits 0xc4 Three byte VEX
157 * 0x8f Three byte XOP
158 * ~R 1 bit Inverted REX.R
159 * ~X 1 bit Inverted REX.X
160 * ~B 1 bit Inverted REX.B
161 * map 5 bits Opcode map to use
162 * W 1 bit REX.W for integer, otherwise opcode extension
163 * ~vvvv 4 bits Inverted XMM or YMM registers
164 * L 1 bit 128 bit vector if 0, 256 otherwise
165 * pp 2 bits Mandatory prefix
172 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
173 * | 1 1 0 0 0 1 0 0 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
174 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
176 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
177 * | 1 0 0 0 1 1 1 1 | |~R |~X |~B | map | | W | ~vvvv | L | pp |
178 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
180 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
181 * | 1 1 0 0 0 1 0 1 | |~R | ~vvvv | L | pp |
182 * +---+---+---+---+---+---+---+---+ +---+---+---+---+---+---+---+---+
184 # define vex(r,x,b,map,w,vvvv,l,pp) _vex(_jit,r,x,b,map,w,vvvv,l,pp)
186 _vex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
187 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
188 # define nop(n) _nop(_jit, n)
189 static void _nop(jit_state_t*, jit_int32_t);
190 # define emms() is(0x770f)
191 # define lea(md, rb, ri, ms, rd) _lea(_jit, md, rb, ri, ms, rd)
193 _lea(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
194 # define pushr(r0) _pushr(_jit, r0)
195 static void _pushr(jit_state_t*, jit_int32_t) maybe_unused;
196 # define popr(r0) _popr(_jit, r0)
197 static void _popr(jit_state_t*, jit_int32_t) maybe_unused;
198 # define xchgr(r0, r1) _xchgr(_jit, r0, r1)
199 static void _xchgr(jit_state_t*, jit_int32_t, jit_int32_t);
200 # define testr(r0, r1) _testr(_jit, r0, r1)
201 static void _testr(jit_state_t*, jit_int32_t, jit_int32_t);
202 # define testi(r0, i0) _testi(_jit, r0, i0)
203 static void _testi(jit_state_t*, jit_int32_t, jit_word_t);
204 # define cc(code, r0) _cc(_jit, code, r0)
205 static void _cc(jit_state_t*, jit_int32_t, jit_int32_t);
206 # define icmpr(r0, r1) alur(X86_CMP, r0, r1)
207 # define alur(code, r0, r1) _alur(_jit, code, r0, r1)
208 static void _alur(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
209 # define icmpi(r0, i0) alui(X86_CMP, r0, i0)
210 # define alui(code, r0, i0) _alui(_jit, code, r0, i0)
211 static void _alui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
212 # define iaddr(r0, r1) alur(X86_ADD, r0, r1)
213 # define save(r0) _save(_jit, r0)
214 static void _save(jit_state_t*, jit_int32_t);
215 # define load(r0) _load(_jit, r0)
216 static void _load(jit_state_t*, jit_int32_t);
217 # define addr(r0, r1, r2) _addr(_jit, r0, r1, r2)
218 static void _addr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
219 # define iaddi(r0, i0) alui(X86_ADD, r0, i0)
220 # define addi(r0, r1, i0) _addi(_jit, r0, r1, i0)
221 static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
222 #define addcr(r0, r1, r2) _addcr(_jit, r0, r1, r2)
223 static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
224 #define addci(r0, r1, i0) _addci(_jit, r0, r1, i0)
225 static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
226 # define iaddxr(r0, r1) _iaddxr(_jit, r0, r1)
227 static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t);
228 # define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2)
229 static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
230 # define iaddxi(r0, i0) alui(X86_ADC, r0, i0)
231 # define addxi(r0, r1, i0) _addxi(_jit, r0, r1, i0)
232 static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
233 # define isubr(r0, r1) alur(X86_SUB, r0, r1)
234 # define subr(r0, r1, r2) _subr(_jit, r0, r1, r2)
235 static void _subr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
236 # define isubi(r0, i0) alui(X86_SUB, r0, i0)
237 # define subi(r0, r1, i0) _subi(_jit, r0, r1, i0)
238 static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
239 # define subcr(r0, r1, r2) _subcr(_jit, r0, r1, r2)
240 static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
241 # define subci(r0, r1, i0) _subci(_jit, r0, r1, i0)
242 static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
243 # define isubxr(r0, r1) alur(X86_SBB, r0, r1)
244 # define subxr(r0, r1, r2) _subxr(_jit, r0, r1, r2)
245 static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
246 # define isubxi(r0, i0) alui(X86_SBB, r0, i0)
247 # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0)
248 static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
249 # define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0)
250 static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
251 # define imulr(r0, r1) _imulr(_jit, r0, r1)
252 static void _imulr(jit_state_t*, jit_int32_t, jit_int32_t);
253 # define imuli(r0, r1, i0) _imuli(_jit, r0, r1, i0)
254 static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
255 # define mulr(r0, r1, r2) _mulr(_jit, r0, r1, r2)
256 static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
257 # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
258 static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
259 # define hmulr(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 1)
260 # define hmulr_u(r0, r1, r2) _iqmulr(_jit, JIT_NOREG, r0, r1, r2, 0)
261 # define hmuli(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 1)
262 # define hmuli_u(r0, r1, i0) _iqmuli(_jit, JIT_NOREG, r0, r1, i0, 0)
263 # define umulr(r0) unr(X86_IMUL, r0)
264 # define umulr_u(r0) unr(X86_MUL, r0)
265 # define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1)
266 # define qmulr_u(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 0)
267 # define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
268 static void _iqmulr(jit_state_t*, jit_int32_t, jit_int32_t,
269 jit_int32_t,jit_int32_t, jit_bool_t);
270 # define qmuli(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 1)
271 # define qmuli_u(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 0)
272 # define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign)
273 static void _iqmuli(jit_state_t*, jit_int32_t, jit_int32_t,
274 jit_int32_t,jit_word_t, jit_bool_t);
275 # define sign_extend_rdx_rax() _sign_extend_rdx_rax(_jit)
276 static void _sign_extend_rdx_rax(jit_state_t*);
277 # define idivr(r0) unr(X86_IDIV, r0)
278 # define idivr_u(r0) unr(X86_DIV, r0)
279 # define divremr(r0, r1, r2, i0, i1) _divremr(_jit, r0, r1, r2, i0, i1)
281 _divremr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,
282 jit_bool_t,jit_bool_t);
283 # define divremi(r0, r1, i0, i1, i2) _divremi(_jit, r0, r1, i0, i1, i2)
285 _divremi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_bool_t,jit_bool_t);
286 # define divr(r0, r1, r2) divremr(r0, r1, r2, 1, 1)
287 # define divi(r0, r1, i0) divremi(r0, r1, i0, 1, 1)
288 # define divr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 1)
289 # define divi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 1)
290 # define qdivr(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 1)
291 # define qdivr_u(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 0)
292 # define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign)
293 static void _iqdivr(jit_state_t*, jit_int32_t, jit_int32_t,
294 jit_int32_t,jit_int32_t, jit_bool_t);
295 # define qdivi(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 1)
296 # define qdivi_u(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 0)
297 # define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign)
298 static void _iqdivi(jit_state_t*, jit_int32_t, jit_int32_t,
299 jit_int32_t,jit_word_t, jit_bool_t);
300 # define remr(r0, r1, r2) divremr(r0, r1, r2, 1, 0)
301 # define remi(r0, r1, i0) divremi(r0, r1, i0, 1, 0)
302 # define remr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 0)
303 # define remi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 0)
304 # define iandr(r0, r1) alur(X86_AND, r0, r1)
305 # define andr(r0, r1, r2) _andr(_jit, r0, r1, r2)
306 static void _andr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
307 # define iandi(r0, i0) alui(X86_AND, r0, i0)
308 # define andi(r0, r1, i0) _andi(_jit, r0, r1, i0)
309 static void _andi(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
310 # define iorr(r0, r1) alur(X86_OR, r0, r1)
311 # define orr(r0, r1, r2) _orr(_jit, r0, r1, r2)
312 static void _orr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
313 # define iori(r0, i0) alui(X86_OR, r0, i0)
314 # define ori(r0, r1, i0) _ori(_jit, r0, r1, i0)
315 static void _ori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
316 # define ixorr(r0, r1) alur(X86_XOR, r0, r1)
317 # define xorr(r0, r1, r2) _xorr(_jit, r0, r1, r2)
318 static void _xorr(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t);
319 # define ixori(r0, i0) alui(X86_XOR, r0, i0)
320 # define xori(r0, r1, i0) _xori(_jit, r0, r1, i0)
321 static void _xori(jit_state_t*, jit_int32_t,jit_int32_t,jit_word_t);
322 # define irotshr(code, r0) _irotshr(_jit, code, r0)
323 static void _irotshr(jit_state_t*, jit_int32_t, jit_int32_t);
324 # define rotshr(code, r0, r1, r2) _rotshr(_jit, code, r0, r1, r2)
326 _rotshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
327 # define irotshi(code, r0, i0) _irotshi(_jit, code, r0, i0)
328 static void _irotshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
329 # define rotshi(code, r0, r1, i0) _rotshi(_jit, code, r0, r1, i0)
331 _rotshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
332 # define lshr(r0, r1, r2) rotshr(X86_SHL, r0, r1, r2)
333 # define qlshr(r0, r1, r2, r3) xlshr(1, r0, r1, r2, r3)
334 # define xlshr(s, r0, r1, r2, r3) _xlshr(_jit, s, r0, r1, r2, r3)
336 _xlshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
337 # define lshi(r0, r1, i0) _lshi(_jit, r0, r1, i0)
338 static void _lshi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
339 # define qlshi(r0, r1, r2, i0) xlshi(1, r0, r1, r2, i0)
340 # define xlshi(s, r0, r1, r2, i0) _xlshi(_jit, s, r0, r1, r2, i0)
342 _xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
343 # define qlshr_u(r0, r1, r2, r3) xlshr(0, r0, r1, r2, r3)
344 # define qlshi_u(r0, r1, r2, i0) xlshi(0, r0, r1, r2, i0)
345 # define rshr(r0, r1, r2) rotshr(X86_SAR, r0, r1, r2)
346 # define rshi(r0, r1, i0) rotshi(X86_SAR, r0, r1, i0)
347 # define rshr_u(r0, r1, r2) rotshr(X86_SHR, r0, r1, r2)
348 # define rshi_u(r0, r1, i0) rotshi(X86_SHR, r0, r1, i0)
349 # define qrshr(r0, r1, r2, r3) xrshr(1, r0, r1, r2, r3)
350 # define qrshr_u(r0, r1, r2, r3) xrshr(0, r0, r1, r2, r3)
351 # define xrshr(s, r0, r1, r2, r3) _xrshr(_jit, s, r0, r1, r2, r3)
353 _xrshr(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
354 # define qrshi(r0, r1, r2, i0) xrshi(1, r0, r1, r2, i0)
355 # define qrshi_u(r0, r1, r2, i0) xrshi(0, r0, r1, r2, i0)
356 # define xrshi(s, r0, r1, r2, i0) _xrshi(_jit, s, r0, r1, r2, i0)
358 _xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
359 # define lrotr(r0, r1, r2) rotshr(X86_ROL, r0, r1, r2)
360 # define lroti(r0, r1, i0) rotshi(X86_ROL, r0, r1, i0)
361 # define rrotr(r0, r1, r2) rotshr(X86_ROR, r0, r1, r2)
362 # define rroti(r0, r1, i0) rotshi(X86_ROR, r0, r1, i0)
363 # define unr(code, r0) _unr(_jit, code, r0)
364 static void _unr(jit_state_t*, jit_int32_t, jit_int32_t);
365 # define inegr(r0) unr(X86_NEG, r0)
366 # define negr(r0, r1) _negr(_jit, r0, r1)
367 static void _negr(jit_state_t*, jit_int32_t, jit_int32_t);
368 # define icomr(r0) unr(X86_NOT, r0)
369 # define comr(r0, r1) _comr(_jit, r0, r1)
370 static void _comr(jit_state_t*, jit_int32_t, jit_int32_t);
372 # define incr(r0, r1) _incr(_jit, r0, r1)
373 static void _incr(jit_state_t*, jit_int32_t, jit_int32_t);
374 # define decr(r0, r1) _decr(_jit, r0, r1)
375 static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
377 # define clor(r0, r1) _clor(_jit, r0, r1)
378 static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
379 # define clzr(r0, r1) _clzr(_jit, r0, r1)
380 static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
381 # define ctor(r0, r1) _ctor(_jit, r0, r1)
382 static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
383 # define ctzr(r0, r1) _ctzr(_jit, r0, r1)
384 static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
385 # define rbitr(r0, r1) _rbitr(_jit, r0, r1)
386 static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t);
387 # define popcntr(r0, r1) _popcntr(_jit, r0, r1)
388 static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t);
389 # define cr(code, r0, r1, r2) _cr(_jit, code, r0, r1, r2)
391 _cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
392 # define ci(code, r0, r1, i0) _ci(_jit, code, r0, r1, i0)
394 _ci(jit_state_t *_jit, jit_int32_t, jit_int32_t, jit_int32_t, jit_word_t);
395 # define ci0(code, r0, r1) _ci0(_jit, code, r0, r1)
396 static void _ci0(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
397 # define ltr(r0, r1, r2) _ltr(_jit, r0, r1, r2)
398 static void _ltr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
399 # define lti(r0, r1, i0) _lti(_jit, r0, r1, i0)
400 static void _lti(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
401 # define ltr_u(r0, r1, r2) _ltr_u(_jit, r0, r1, r2)
402 static void _ltr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
403 # define lti_u(r0, r1, i0) ci(X86_CC_B, r0, r1, i0)
404 # define ler(r0, r1, r2) _ler(_jit, r0, r1, r2)
405 static void _ler(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
406 # define lei(r0, r1, i0) ci(X86_CC_LE, r0, r1, i0)
407 # define ler_u(r0, r1, r2) _ler_u(_jit, r0, r1, r2)
408 static void _ler_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
409 # define lei_u(r0, r1, i0) _lei_u(_jit, r0, r1, i0)
410 static void _lei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
411 # define eqr(r0, r1, r2) _eqr(_jit, r0, r1, r2)
412 static void _eqr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
413 # define eqi(r0, r1, i0) _eqi(_jit, r0, r1, i0)
414 static void _eqi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
415 # define ger(r0, r1, r2) _ger(_jit, r0, r1, r2)
416 static void _ger(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
417 # define gei(r0, r1, i0) _gei(_jit, r0, r1, i0)
418 static void _gei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
419 # define ger_u(r0, r1, r2) _ger_u(_jit, r0, r1, r2)
420 static void _ger_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
421 # define gei_u(r0, r1, i0) _gei_u(_jit, r0, r1, i0)
422 static void _gei_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
423 # define gtr(r0, r1, r2) _gtr(_jit, r0, r1, r2)
424 static void _gtr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
425 # define gti(r0, r1, i0) _ci(_jit, X86_CC_G, r0, r1, i0)
426 # define gtr_u(r0, r1, r2) _gtr_u(_jit, r0, r1, r2)
427 static void _gtr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
428 # define gti_u(r0, r1, i0) _gti_u(_jit, r0, r1, i0)
429 static void _gti_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
430 # define ner(r0, r1, r2) _ner(_jit, r0, r1, r2)
431 static void _ner(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
432 # define nei(r0, r1, i0) _nei(_jit, r0, r1, i0)
433 static void _nei(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
434 # define movr(r0, r1) _movr(_jit, r0, r1)
435 static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
436 # define imovi(r0, i0) _imovi(_jit, r0, i0)
437 static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
438 # define movi(r0, i0) _movi(_jit, r0, i0)
445 _movi(jit_state_t*, jit_int32_t, jit_word_t);
446 # define movi_p(r0, i0) _movi_p(_jit, r0, i0)
447 static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
448 # define movcr(r0, r1) _movcr(_jit, r0, r1)
449 static void _movcr(jit_state_t*,jit_int32_t,jit_int32_t);
450 # define movcr_u(r0, r1) _movcr_u(_jit, r0, r1)
451 static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
452 # define movsr(r0, r1) _movsr(_jit, r0, r1)
453 static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
454 # define movsr_u(r0, r1) _movsr_u(_jit, r0, r1)
455 static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
456 # define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
457 static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
458 jit_int32_t,jit_int32_t,jit_word_t);
459 #define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
460 #define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
461 #define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2)
462 static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
463 #define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2)
464 static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
465 # if __X64 && !__X64_32
466 # define movir(r0, r1) _movir(_jit, r0, r1)
467 static void _movir(jit_state_t*,jit_int32_t,jit_int32_t);
468 # define movir_u(r0, r1) _movir_u(_jit, r0, r1)
469 static void _movir_u(jit_state_t*,jit_int32_t,jit_int32_t);
471 # define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
472 static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
473 # define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
474 static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
475 # if __X64 && !__X64_32
476 #define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1)
477 static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
479 # define extr(r0, r1, i0, i1) _extr(_jit, r0, r1, i0, i1)
480 static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
481 # define extr_u(r0, r1, i0, i1) _extr_u(_jit, r0, r1, i0, i1)
482 static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
483 # define depr(r0, r1, i0, i1) _depr(_jit, r0, r1, i0, i1)
484 static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
485 # define extr_c(r0, r1) _extr_c(_jit, r0, r1)
486 static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
487 # define extr_uc(r0, r1) _extr_uc(_jit, r0, r1)
488 static void _extr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
489 # define extr_s(r0, r1) movsr(r0, r1)
490 # define extr_us(r0, r1) movsr_u(r0, r1)
491 # if __X64 && !__X64_32
492 # define extr_i(r0, r1) movir(r0, r1)
493 # define extr_ui(r0, r1) movir_u(r0, r1)
495 # define ldr_c(r0, r1) _ldr_c(_jit, r0, r1)
496 static void _ldr_c(jit_state_t*, jit_int32_t, jit_int32_t);
497 # define ldi_c(r0, i0) _ldi_c(_jit, r0, i0)
498 static void _ldi_c(jit_state_t*, jit_int32_t, jit_word_t);
499 # define ldr_uc(r0, r1) _ldr_uc(_jit, r0, r1)
500 static void _ldr_uc(jit_state_t*, jit_int32_t, jit_int32_t);
501 # define ldi_uc(r0, i0) _ldi_uc(_jit, r0, i0)
502 static void _ldi_uc(jit_state_t*, jit_int32_t, jit_word_t);
503 # define ldr_s(r0, r1) _ldr_s(_jit, r0, r1)
504 static void _ldr_s(jit_state_t*, jit_int32_t, jit_int32_t);
505 # define ldi_s(r0, i0) _ldi_s(_jit, r0, i0)
506 static void _ldi_s(jit_state_t*, jit_int32_t, jit_word_t);
507 # define ldr_us(r0, r1) _ldr_us(_jit, r0, r1)
508 static void _ldr_us(jit_state_t*, jit_int32_t, jit_int32_t);
509 # define ldi_us(r0, i0) _ldi_us(_jit, r0, i0)
510 static void _ldi_us(jit_state_t*, jit_int32_t, jit_word_t);
511 # if __X32 || !__X64_32
512 # define ldr_i(r0, r1) _ldr_i(_jit, r0, r1)
513 static void _ldr_i(jit_state_t*, jit_int32_t, jit_int32_t);
514 # define ldi_i(r0, i0) _ldi_i(_jit, r0, i0)
515 static void _ldi_i(jit_state_t*, jit_int32_t, jit_word_t);
519 # define ldr_i(r0, r1) _ldr_ui(_jit, r0, r1)
520 # define ldi_i(r0, i0) _ldi_ui(_jit, r0, i0)
522 # define ldr_ui(r0, r1) _ldr_ui(_jit, r0, r1)
523 # define ldi_ui(r0, i0) _ldi_ui(_jit, r0, i0)
525 static void _ldr_ui(jit_state_t*, jit_int32_t, jit_int32_t);
526 static void _ldi_ui(jit_state_t*, jit_int32_t, jit_word_t);
528 # define ldr_l(r0, r1) _ldr_l(_jit, r0, r1)
529 static void _ldr_l(jit_state_t*, jit_int32_t, jit_int32_t);
530 # define ldi_l(r0, i0) _ldi_l(_jit, r0, i0)
531 static void _ldi_l(jit_state_t*, jit_int32_t, jit_word_t);
534 # define ldxr_c(r0, r1, r2) _ldxr_c(_jit, r0, r1, r2)
535 static void _ldxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
536 # define ldxi_c(r0, r1, i0) _ldxi_c(_jit, r0, r1, i0)
537 static void _ldxi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
538 # define ldxr_uc(r0, r1, r2) _ldxr_uc(_jit, r0, r1, r2)
539 static void _ldxr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
540 # define ldxi_uc(r0, r1, i0) _ldxi_uc(_jit, r0, r1, i0)
541 static void _ldxi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
542 # define ldxr_s(r0, r1, r2) _ldxr_s(_jit, r0, r1, r2)
543 static void _ldxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
544 # define ldxi_s(r0, r1, i0) _ldxi_s(_jit, r0, r1, i0)
545 static void _ldxi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
546 # define ldxr_us(r0, r1, r2) _ldxr_us(_jit, r0, r1, r2)
547 static void _ldxr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
548 # define ldxi_us(r0, r1, i0) _ldxi_us(_jit, r0, r1, i0)
549 static void _ldxi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
550 # if __X32 || !__X64_32
551 # define ldxr_i(r0, r1, r2) _ldxr_i(_jit, r0, r1, r2)
552 static void _ldxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
553 # define ldxi_i(r0, r1, i0) _ldxi_i(_jit, r0, r1, i0)
554 static void _ldxi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
558 # define ldxr_i(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2)
559 # define ldxi_i(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0)
561 # define ldxr_ui(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2)
562 # define ldxi_ui(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0)
564 static void _ldxr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
565 static void _ldxi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
567 # define ldxr_l(r0, r1, r2) _ldxr_l(_jit, r0, r1, r2)
568 static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
569 # define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0)
570 static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
573 # define unldr(r0, r1, i0) generic_unldr(r0, r1, i0)
574 # define unldi(r0, i0, i1) generic_unldi(r0, i0, i1)
575 # define unldr_u(r0, r1, i0) generic_unldr_u(r0, r1, i0)
576 # define unldi_u(r0, i0, i1) generic_unldi_u(r0, i0, i1)
577 # define str_c(r0, r1) _str_c(_jit, r0, r1)
578 static void _str_c(jit_state_t*, jit_int32_t, jit_int32_t);
579 # define sti_c(i0, r0) _sti_c(_jit, i0, r0)
580 static void _sti_c(jit_state_t*, jit_word_t, jit_int32_t);
581 # define str_s(r0, r1) _str_s(_jit, r0, r1)
582 static void _str_s(jit_state_t*, jit_int32_t, jit_int32_t);
583 # define sti_s(i0, r0) _sti_s(_jit, i0, r0)
584 static void _sti_s(jit_state_t*, jit_word_t, jit_int32_t);
585 # define str_i(r0, r1) _str_i(_jit, r0, r1)
586 static void _str_i(jit_state_t*, jit_int32_t, jit_int32_t);
587 # define sti_i(i0, r0) _sti_i(_jit, i0, r0)
588 static void _sti_i(jit_state_t*, jit_word_t, jit_int32_t);
589 # if __X64 && !__X64_32
590 # define str_l(r0, r1) _str_l(_jit, r0, r1)
591 static void _str_l(jit_state_t*, jit_int32_t, jit_int32_t);
592 # define sti_l(i0, r0) _sti_l(_jit, i0, r0)
593 static void _sti_l(jit_state_t*, jit_word_t, jit_int32_t);
595 # define stxr_c(r0, r1, r2) _stxr_c(_jit, r0, r1, r2)
596 static void _stxr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
597 # define stxi_c(i0, r0, r1) _stxi_c(_jit, i0, r0, r1)
598 static void _stxi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
599 # define stxr_s(r0, r1, r2) _stxr_s(_jit, r0, r1, r2)
600 static void _stxr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
601 # define stxi_s(i0, r0, r1) _stxi_s(_jit, i0, r0, r1)
602 static void _stxi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
603 # define stxr_i(r0, r1, r2) _stxr_i(_jit, r0, r1, r2)
604 static void _stxr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
605 # define stxi_i(i0, r0, r1) _stxi_i(_jit, i0, r0, r1)
606 static void _stxi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
607 # if __X64 && !__X64_32
608 # define stxr_l(r0, r1, r2) _stxr_l(_jit, r0, r1, r2)
609 static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
610 # define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1)
611 static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
613 #define unstr(r0, r1, i0) generic_unstr(r0, r1, i0)
614 #define unsti(i0, r0, i1) generic_unsti(i0, r0, i1)
615 # define jcc(code, i0) _jcc(_jit, code, i0)
616 # define jo(i0) jcc(X86_CC_O, i0)
617 # define jno(i0) jcc(X86_CC_NO, i0)
618 # define jnae(i0) jcc(X86_CC_NAE, i0)
619 # define jb(i0) jcc(X86_CC_B, i0)
620 # define jc(i0) jcc(X86_CC_C, i0)
621 # define jae(i0) jcc(X86_CC_AE, i0)
622 # define jnb(i0) jcc(X86_CC_NB, i0)
623 # define jnc(i0) jcc(X86_CC_NC, i0)
624 # define je(i0) jcc(X86_CC_E, i0)
625 # define jz(i0) jcc(X86_CC_Z, i0)
626 # define jne(i0) jcc(X86_CC_NE, i0)
627 # define jnz(i0) jcc(X86_CC_NZ, i0)
628 # define jbe(i0) jcc(X86_CC_BE, i0)
629 # define jna(i0) jcc(X86_CC_NA, i0)
630 # define ja(i0) jcc(X86_CC_A, i0)
631 # define jnbe(i0) jcc(X86_CC_NBE, i0)
632 # define js(i0) jcc(X86_CC_S, i0)
633 # define jns(i0) jcc(X86_CC_NS, i0)
634 # define jp(i0) jcc(X86_CC_P, i0)
635 # define jpe(i0) jcc(X86_CC_PE, i0)
636 # define jnp(i0) jcc(X86_CC_NP, i0)
637 # define jpo(i0) jcc(X86_CC_PO, i0)
638 # define jl(i0) jcc(X86_CC_L, i0)
639 # define jnge(i0) jcc(X86_CC_NGE, i0)
640 # define jge(i0) jcc(X86_CC_GE, i0)
641 # define jnl(i0) jcc(X86_CC_NL, i0)
642 # define jle(i0) jcc(X86_CC_LE, i0)
643 # define jng(i0) jcc(X86_CC_NG, i0)
644 # define jg(i0) jcc(X86_CC_G, i0)
645 # define jnle(i0) jcc(X86_CC_NLE, i0)
646 static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t);
647 # define jccs(code, i0) _jccs(_jit, code, i0)
648 # define jos(i0) jccs(X86_CC_O, i0)
649 # define jnos(i0) jccs(X86_CC_NO, i0)
650 # define jnaes(i0) jccs(X86_CC_NAE, i0)
651 # define jbs(i0) jccs(X86_CC_B, i0)
652 # define jcs(i0) jccs(X86_CC_C, i0)
653 # define jaes(i0) jccs(X86_CC_AE, i0)
654 # define jnbs(i0) jccs(X86_CC_NB, i0)
655 # define jncs(i0) jccs(X86_CC_NC, i0)
656 # define jes(i0) jccs(X86_CC_E, i0)
657 # define jzs(i0) jccs(X86_CC_Z, i0)
658 # define jnes(i0) jccs(X86_CC_NE, i0)
659 # define jnzs(i0) jccs(X86_CC_NZ, i0)
660 # define jbes(i0) jccs(X86_CC_BE, i0)
661 # define jnas(i0) jccs(X86_CC_NA, i0)
662 # define jas(i0) jccs(X86_CC_A, i0)
663 # define jnbes(i0) jccs(X86_CC_NBE, i0)
664 # define jss(i0) jccs(X86_CC_S, i0)
665 # define jnss(i0) jccs(X86_CC_NS, i0)
666 # define jps(i0) jccs(X86_CC_P, i0)
667 # define jpes(i0) jccs(X86_CC_PE, i0)
668 # define jnps(i0) jccs(X86_CC_NP, i0)
669 # define jpos(i0) jccs(X86_CC_PO, i0)
670 # define jls(i0) jccs(X86_CC_L, i0)
671 # define jnges(i0) jccs(X86_CC_NGE, i0)
672 # define jges(i0) jccs(X86_CC_GE, i0)
673 # define jnls(i0) jccs(X86_CC_NL, i0)
674 # define jles(i0) jccs(X86_CC_LE, i0)
675 # define jngs(i0) jccs(X86_CC_NG, i0)
676 # define jgs(i0) jccs(X86_CC_G, i0)
677 # define jnles(i0) jccs(X86_CC_NLE, i0)
678 static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t);
679 # define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1)
680 static jit_word_t _jcr(jit_state_t*,
681 jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
682 # define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1)
683 static jit_word_t _jci(jit_state_t*,
684 jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
685 # define jci0(code, i0, r0) _jci0(_jit, code, i0, r0)
686 static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
687 # define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1)
688 static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
689 # define blti(i0, r0, i1) _blti(_jit, i0, r0, i1)
690 static jit_word_t _blti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
691 # define bltr_u(i0, r0, r1) _bltr_u(_jit, i0, r0, r1)
692 static jit_word_t _bltr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
693 # define blti_u(i0, r0, i1) _blti_u(_jit, i0, r0, i1)
694 static jit_word_t _blti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
695 # define bler(i0, r0, r1) _bler(_jit, i0, r0, r1)
696 static jit_word_t _bler(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
697 # define blei(i0, r0, i1) _blei(_jit, i0, r0, i1)
698 static jit_word_t _blei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
699 # define bler_u(i0, r0, r1) _bler_u(_jit, i0, r0, r1)
700 static jit_word_t _bler_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
701 # define blei_u(i0, r0, i1) _blei_u(_jit, i0, r0, i1)
702 static jit_word_t _blei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
703 # define beqr(i0, r0, r1) _beqr(_jit, i0, r0, r1)
704 static jit_word_t _beqr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
705 # define beqi(i0, r0, i1) _beqi(_jit, i0, r0, i1)
706 static jit_word_t _beqi(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
707 # define bger(i0, r0, r1) _bger(_jit, i0, r0, r1)
708 static jit_word_t _bger(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
709 # define bgei(i0, r0, i1) _bgei(_jit, i0, r0, i1)
710 static jit_word_t _bgei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
711 # define bger_u(i0, r0, r1) _bger_u(_jit, i0, r0, r1)
712 static jit_word_t _bger_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
713 # define bgei_u(i0, r0, i1) _bgei_u(_jit, i0, r0, i1)
714 static jit_word_t _bgei_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
715 # define bgtr(i0, r0, r1) _bgtr(_jit, i0, r0, r1)
716 static jit_word_t _bgtr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
717 # define bgti(i0, r0, i1) _bgti(_jit, i0, r0, i1)
718 static jit_word_t _bgti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
719 # define bgtr_u(i0, r0, r1) _bgtr_u(_jit, i0, r0, r1)
720 static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
721 # define bgti_u(i0, r0, i1) _bgti_u(_jit, i0, r0, i1)
722 static jit_word_t _bgti_u(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
723 # define bner(i0, r0, r1) _bner(_jit, i0, r0, r1)
724 static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
725 # define bnei(i0, r0, i1) _bnei(_jit, i0, r0, i1)
726 static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
727 # define bmsr(i0, r0, r1) _bmsr(_jit, i0, r0, r1)
728 static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
729 # define bmsi(i0, r0, i1) _bmsi(_jit, i0, r0, i1)
730 static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
731 # define bmcr(i0, r0, r1) _bmcr(_jit, i0, r0, r1)
732 static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
733 # define bmci(i0, r0, i1) _bmci(_jit, i0, r0, i1)
734 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
735 # define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1)
736 static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
737 # define boaddi(i0, r0, i1) _boaddi(_jit, i0, r0, i1)
738 static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
739 # define boaddr_u(i0, r0, r1) _boaddr_u(_jit, i0, r0, r1)
740 static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
741 # define boaddi_u(i0, r0, i1) _boaddi_u(_jit, i0, r0, i1)
742 static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
743 # define bxaddr(i0, r0, r1) _bxaddr(_jit, i0, r0, r1)
744 static jit_word_t _bxaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
745 # define bxaddi(i0, r0, i1) _bxaddi(_jit, i0, r0, i1)
746 static jit_word_t _bxaddi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
747 # define bxaddr_u(i0, r0, r1) _bxaddr_u(_jit, i0, r0, r1)
748 static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
749 # define bxaddi_u(i0, r0, i1) _bxaddi_u(_jit, i0, r0, i1)
750 static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
751 # define bosubr(i0, r0, r1) _bosubr(_jit, i0, r0, r1)
752 static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
753 # define bosubi(i0, r0, i1) _bosubi(_jit, i0, r0, i1)
754 static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
755 # define bosubr_u(i0, r0, r1) _bosubr_u(_jit, i0, r0, r1)
756 static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
757 # define bosubi_u(i0, r0, i1) _bosubi_u(_jit, i0, r0, i1)
758 static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
759 # define bxsubr(i0, r0, r1) _bxsubr(_jit, i0, r0, r1)
760 static jit_word_t _bxsubr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
761 # define bxsubi(i0, r0, i1) _bxsubi(_jit, i0, r0, i1)
762 static jit_word_t _bxsubi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
763 # define bxsubr_u(i0, r0, r1) _bxsubr_u(_jit, i0, r0, r1)
764 static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
765 # define bxsubi_u(i0, r0, i1) _bxsubi_u(_jit, i0, r0, i1)
766 static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
767 # define callr(r0) _callr(_jit, r0)
768 static void _callr(jit_state_t*, jit_int32_t);
769 # define calli(i0) _calli(_jit, i0)
770 static jit_word_t _calli(jit_state_t*, jit_word_t);
772 # define calli_p(i0) _calli_p(_jit, i0)
773 static jit_word_t _calli_p(jit_state_t*, jit_word_t);
775 # define calli_p(i0) calli(i0)
777 # define jmpr(r0) _jmpr(_jit, r0)
778 static void _jmpr(jit_state_t*, jit_int32_t);
779 # define jmpi(i0) _jmpi(_jit, i0)
780 static jit_word_t _jmpi(jit_state_t*, jit_word_t);
782 # define jmpi_p(i0) _jmpi_p(_jit, i0)
783 static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
785 # define jmpi_p(i0) jmpi(i0)
787 # define jmpsi(i0) _jmpsi(_jit, i0)
788 static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t);
789 # define prolog(node) _prolog(_jit, node)
790 static void _prolog(jit_state_t*, jit_node_t*);
791 # define epilog(node) _epilog(_jit, node)
792 static void _epilog(jit_state_t*, jit_node_t*);
793 # define vastart(r0) _vastart(_jit, r0)
794 static void _vastart(jit_state_t*, jit_int32_t);
795 # define vaarg(r0, r1) _vaarg(_jit, r0, r1)
796 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
797 # define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0)
798 static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
799 # define patch_at(instr, label) _patch_at(_jit, instr, label)
800 static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
801 # if !defined(HAVE_FFSL)
803 # define ffsl(i) __builtin_ffs(i)
805 # define ffsl(l) __builtin_ffsl(l)
808 # define jit_cmov_p() jit_cpu.cmov
813 _rex(jit_state_t *_jit, jit_int32_t l, jit_int32_t w,
814 jit_int32_t r, jit_int32_t x, jit_int32_t b)
817 jit_int32_t v = 0x40 | (w << 3);
831 _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
832 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms)
836 /* Use ms == _SCL8 to tell it is a %rip relative displacement */
840 mrm(0x00, r7(rd), 0x05);
843 mrm(0x00, r7(rd), 0x04);
844 sib(_SCL1, 0x04, 0x05);
849 else if (r7(rb) == _RSP_REGNO) {
851 mrm(0x00, r7(rd), 0x04);
854 else if ((jit_int8_t)md == md) {
855 mrm(0x01, r7(rd), 0x04);
860 mrm(0x02, r7(rd), 0x04);
866 if (md == 0 && r7(rb) != _RBP_REGNO)
867 mrm(0x00, r7(rd), r7(rb));
868 else if ((jit_int8_t)md == md) {
869 mrm(0x01, r7(rd), r7(rb));
873 mrm(0x02, r7(rd), r7(rb));
878 else if (rb == _NOREG) {
879 mrm(0x00, r7(rd), 0x04);
880 sib(ms, r7(ri), 0x05);
883 else if (r8(ri) != _RSP_REGNO) {
884 if (md == 0 && r7(rb) != _RBP_REGNO) {
885 mrm(0x00, r7(rd), 0x04);
886 sib(ms, r7(ri), r7(rb));
888 else if ((jit_int8_t)md == md) {
889 mrm(0x01, r7(rd), 0x04);
890 sib(ms, r7(ri), r7(rb));
894 mrm(0x02, r7(rd), 0x04);
895 sib(ms, r7(ri), r7(rb));
900 fprintf(stderr, "illegal index register");
906 _vex(jit_state_t *_jit, jit_int32_t r, jit_int32_t x, jit_int32_t b,
907 jit_int32_t map, jit_int32_t w, jit_int32_t vvvv, jit_int32_t l,
911 if (r == _NOREG) r = 0;
912 if (x == _NOREG) x = 0;
913 if (b == _NOREG) b = 0;
914 if (map == 1 && w == 0 && ((x|b) & 8) == 0) {
915 /* Two byte prefix */
918 v = (r & 8) ? 0 : 0x80;
921 /* Three byte prefix */
929 if (!(r & 8)) v |= 0x80;
931 if (!(x & 8)) v |= 0x40;
933 if (!(b & 8)) v |= 0x20;
939 v |= (~vvvv & 0x0f) << 3;
948 _nop(jit_state_t *_jit, jit_int32_t count)
964 case 3: /* NOP DWORD ptr [EAX] */
965 ic(0x0f); ic(0x1f); ic(0x00);
967 case 4: /* NOP DWORD ptr [EAX + 00H] */
968 ic(0x0f); ic(0x1f); ic(0x40); ic(0x00);
970 case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */
971 ic(0x0f); ic(0x1f); ic(0x44); ic(0x00);
974 case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */
975 ic(0x66); ic(0x0f); ic(0x1f); ic(0x44);
978 case 7: /* NOP DWORD ptr [EAX + 00000000H] */
979 ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000);
981 case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
982 ic(0x0f); ic(0x1f); ic(0x84); ic(0x00);
985 case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */
986 ic(0x66); ic(0x0f); ic(0x1f); ic(0x84);
987 ic(0x00); ii(0x0000);
994 _lea(jit_state_t *_jit, jit_int32_t md, jit_int32_t rb,
995 jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
997 rex(0, WIDE, rd, ri, rb);
999 rx(rd, md, rb, ri, ms);
1003 _pushr(jit_state_t *_jit, jit_int32_t r0)
1005 rex(0, WIDE, 0, 0, r0);
1010 _popr(jit_state_t *_jit, jit_int32_t r0)
1012 rex(0, WIDE, 0, 0, r0);
1017 _xchgr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1019 rex(0, WIDE, r1, _NOREG, r0);
1021 mrm(0x03, r7(r1), r7(r0));
1025 _testr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1027 rex(0, WIDE, r1, _NOREG, r0);
1029 mrm(0x03, r7(r1), r7(r0));
1033 _testi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1035 rex(0, WIDE, _NOREG, _NOREG, r0);
1036 if (r0 == _RAX_REGNO)
1040 mrm(0x03, 0x00, r7(r0));
1046 _cc(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1048 rex(0, 0, _NOREG, _NOREG, r0);
1051 mrm(0x03, 0x00, r7(r0));
1055 _alur(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
1057 rex(0, WIDE, r1, _NOREG, r0);
1059 mrm(0x03, r7(r1), r7(r0));
1063 _alui(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
1066 if (can_sign_extend_int_p(i0)) {
1067 rex(0, WIDE, _NOREG, _NOREG, r0);
1068 if ((jit_int8_t)i0 == i0) {
1070 ic(0xc0 | code | r7(r0));
1074 if (r0 == _RAX_REGNO)
1078 ic(0xc0 | code | r7(r0));
1084 reg = jit_get_reg(jit_class_gpr);
1086 alur(code, r0, rn(reg));
1092 _save(jit_state_t *_jit, jit_int32_t r0)
1094 if (!_jitc->function->regoff[r0]) {
1095 _jitc->function->regoff[r0] = jit_allocai(sizeof(jit_word_t));
1098 assert(!jit_regset_tstbit(&_jitc->regsav, r0));
1099 jit_regset_setbit(&_jitc->regsav, r0);
1100 stxi(_jitc->function->regoff[r0], _RBP_REGNO, r0);
1104 _load(jit_state_t *_jit, jit_int32_t r0)
1106 assert(_jitc->function->regoff[r0]);
1107 assert(jit_regset_tstbit(&_jitc->regsav, r0));
1108 jit_regset_clrbit(&_jitc->regsav, r0);
1109 ldxi(r0, _RBP_REGNO, _jitc->function->regoff[r0]);
1113 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1120 lea(0, r1, r2, _SCL1, r0);
1124 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1135 else if (can_sign_extend_int_p(i0)) {
1139 lea(i0, r1, _NOREG, _SCL1, r0);
1141 else if (r0 != r1) {
1146 reg = jit_get_reg(jit_class_gpr);
1154 _addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1165 _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1168 if (can_sign_extend_int_p(i0)) {
1172 else if (r0 == r1) {
1173 reg = jit_get_reg(jit_class_gpr);
1185 _iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1187 /* FIXME: this is not doing what I did expect for the simple test case:
1188 * mov $0xffffffffffffffff, %rax -- rax = 0xffffffffffffffff (-1)
1189 * mov $0xffffffffffffffff, %r10 -- r10 = 0xffffffffffffffff (-1)
1190 * mov $0x1, %r11d -- r11 = 1
1191 * xor %rbx, %rbx -- rbx = 0
1194 * add %r11, %rax -- r11 = 0x10000000000000000 (0)
1195 * does not fit in 64 bit ^
1197 * $2 = [ CF PF AF ZF IF ]
1198 * adcx %r10, %rbx -- r10 = 0xffffffffffffffff (-1)
1200 * $3 = [ CF PF AF ZF IF ]
1202 * $4 = 0xffffffffffffffff
1203 * but, r10 should be zero, as it is:
1204 * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF)
1205 * FIXME: maybe should only use ADCX in the third operation onward, that
1206 * is, after the first ADC? In either case, the add -1+0+carry should
1207 * have used and consumed the carry? At least this is what is expected
1211 /* Significantly longer instruction, but avoid cpu stalls as only
1212 * the carry flag is used in a sequence. */
1216 rex(0, WIDE, r1, _NOREG, r0);
1220 mrm(0x03, r7(r1), r7(r0));
1224 alur(X86_ADC, r0, r1);
1228 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1239 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1244 /* Do not mix ADC and ADCX */
1247 can_sign_extend_int_p(i0)) {
1251 else if (r0 == r1) {
1252 reg = jit_get_reg(jit_class_gpr);
1254 iaddxr(r0, rn(reg));
1264 _subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1268 else if (r0 == r2) {
1279 _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1290 else if (can_sign_extend_int_p(i0)) {
1294 lea(-i0, r1, _NOREG, _SCL1, r0);
1296 else if (r0 != r1) {
1301 reg = jit_get_reg(jit_class_gpr);
1309 _subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1312 if (r0 == r2 && r0 != r1) {
1313 reg = jit_get_reg(jit_class_gpr);
1326 _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1330 if (can_sign_extend_int_p(i0))
1333 reg = jit_get_reg(jit_class_gpr);
1341 _subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1344 if (r0 == r2 && r0 != r1) {
1345 reg = jit_get_reg(jit_class_gpr);
1348 isubxr(r0, rn(reg));
1358 _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1362 if (can_sign_extend_int_p(i0))
1365 reg = jit_get_reg(jit_class_gpr);
1367 isubxr(r0, rn(reg));
1373 _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1380 _imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1382 rex(0, WIDE, r0, _NOREG, r1);
1385 mrm(0x03, r7(r0), r7(r1));
1389 _imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1392 if (can_sign_extend_int_p(i0)) {
1393 rex(0, WIDE, r0, _NOREG, r1);
1394 if ((jit_int8_t)i0 == i0) {
1396 mrm(0x03, r7(r0), r7(r1));
1401 mrm(0x03, r7(r0), r7(r1));
1406 reg = jit_get_reg(jit_class_gpr);
1414 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1427 _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1440 lea(0, _NOREG, r1, _SCL2, r0);
1443 lea(0, _NOREG, r1, _SCL4, r0);
1446 lea(0, _NOREG, r1, _SCL8, r0);
1449 if (i0 > 0 && !(i0 & (i0 - 1)))
1450 lshi(r0, r1, ffsl(i0) - 1);
1451 else if (can_sign_extend_int_p(i0))
1453 else if (r0 != r1) {
1463 #define savset(rn) \
1467 if (r1 != rn && r2 != rn) \
1471 #define isavset(rn) \
1479 #define qsavset(rn) \
1481 if (r0 != rn && r1 != rn) { \
1483 if (r2 != rn && r3 != rn) \
1487 #define allocr(rn, rv) \
1489 if (set & (1 << rn)) \
1490 (void)jit_get_reg(rv|jit_class_gpr|jit_class_named); \
1491 if (sav & (1 << rn)) { \
1492 if ( jit_regset_tstbit(&_jitc->regsav, rv) || \
1493 !jit_regset_tstbit(&_jitc->reglive, rv)) \
1494 sav &= ~(1 << rn); \
1499 #define clear(rn, rv) \
1501 if (set & (1 << rn)) \
1502 jit_unget_reg(rv); \
1503 if (sav & (1 << rn)) \
1508 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1509 jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1516 qsavset(_RDX_REGNO);
1517 qsavset(_RAX_REGNO);
1518 allocr(_RDX_REGNO, _RDX);
1519 allocr(_RAX_REGNO, _RAX);
1521 if (r3 == _RAX_REGNO)
1525 movr(_RAX_REGNO, r2);
1532 if (r0 != JIT_NOREG) {
1533 if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1534 xchgr(_RAX_REGNO, _RDX_REGNO);
1536 if (r0 != _RDX_REGNO)
1537 movr(r0, _RAX_REGNO);
1538 movr(r1, _RDX_REGNO);
1539 if (r0 == _RDX_REGNO)
1540 movr(r0, _RAX_REGNO);
1544 assert(r1 != JIT_NOREG);
1545 movr(r1, _RDX_REGNO);
1548 clear(_RDX_REGNO, _RDX);
1549 clear(_RAX_REGNO, _RAX);
1553 _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1554 jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1563 reg = jit_get_reg(jit_class_gpr);
1566 qmulr(r0, r1, r2, rn(reg));
1568 qmulr_u(r0, r1, r2, rn(reg));
1574 _sign_extend_rdx_rax(jit_state_t *_jit)
1576 rex(0, WIDE, 0, 0, 0);
1581 _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2,
1582 jit_bool_t sign, jit_bool_t divide)
1590 sav = set = use = 0;
1593 allocr(_RDX_REGNO, _RDX);
1594 allocr(_RAX_REGNO, _RAX);
1596 if (r2 == _RAX_REGNO) {
1597 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1598 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1599 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1600 jit_class_gpr|jit_class_named);
1603 movr(div, _RAX_REGNO);
1604 if (r1 != _RAX_REGNO)
1605 movr(_RAX_REGNO, r1);
1609 xchgr(r0, _RAX_REGNO);
1611 if (r0 != _RAX_REGNO)
1612 movr(r0, _RAX_REGNO);
1613 if (r1 != _RAX_REGNO)
1614 movr(_RAX_REGNO, r1);
1619 else if (r2 == _RDX_REGNO) {
1620 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1621 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1622 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1623 jit_class_gpr|jit_class_named);
1626 movr(div, _RDX_REGNO);
1627 if (r1 != _RAX_REGNO)
1628 movr(_RAX_REGNO, r1);
1631 if (r1 != _RAX_REGNO)
1632 movr(_RAX_REGNO, r1);
1633 movr(r0, _RDX_REGNO);
1638 if (r1 != _RAX_REGNO)
1639 movr(_RAX_REGNO, r1);
1644 sign_extend_rdx_rax();
1648 ixorr(_RDX_REGNO, _RDX_REGNO);
1656 movr(r0, _RAX_REGNO);
1658 movr(r0, _RDX_REGNO);
1660 clear(_RDX_REGNO, _RDX);
1661 clear(_RAX_REGNO, _RAX);
1665 _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0,
1666 jit_bool_t sign, jit_bool_t divide)
1686 if (i0 > 0 && !(i0 & (i0 - 1))) {
1689 rshi(r0, r0, ffsl(i0) - 1);
1691 rshi_u(r0, r0, ffsl(i0) - 1);
1697 else if (i0 == 1 || (sign && i0 == -1)) {
1701 else if (!sign && i0 > 0 && !(i0 & (i0 - 1))) {
1702 if (can_sign_extend_int_p(i0)) {
1706 else if (r0 != r1) {
1711 reg = jit_get_reg(jit_class_gpr);
1712 movi(rn(reg), i0 - 1);
1719 sav = set = use = 0;
1720 isavset(_RDX_REGNO);
1721 isavset(_RAX_REGNO);
1722 allocr(_RDX_REGNO, _RDX);
1723 allocr(_RAX_REGNO, _RAX);
1725 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) {
1726 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1727 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1728 jit_class_gpr|jit_class_named);
1736 movr(_RAX_REGNO, r1);
1739 sign_extend_rdx_rax();
1743 ixorr(_RDX_REGNO, _RDX_REGNO);
1751 movr(r0, _RAX_REGNO);
1753 movr(r0, _RDX_REGNO);
1755 clear(_RDX_REGNO, _RDX);
1756 clear(_RAX_REGNO, _RAX);
1760 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1761 jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
1769 sav = set = use = 0;
1770 qsavset(_RDX_REGNO);
1771 qsavset(_RAX_REGNO);
1772 allocr(_RDX_REGNO, _RDX);
1773 allocr(_RAX_REGNO, _RAX);
1774 if (r3 == _RAX_REGNO) {
1775 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1776 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1777 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1778 jit_class_gpr|jit_class_named);
1781 movr(div, _RAX_REGNO);
1782 if (r2 != _RAX_REGNO)
1783 movr(_RAX_REGNO, r2);
1787 xchgr(r0, _RAX_REGNO);
1789 if (r0 != _RAX_REGNO)
1790 movr(r0, _RAX_REGNO);
1791 if (r2 != _RAX_REGNO)
1792 movr(_RAX_REGNO, r2);
1797 else if (r3 == _RDX_REGNO) {
1798 if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) {
1799 if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG)
1800 reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) |
1801 jit_class_gpr|jit_class_named);
1804 movr(div, _RDX_REGNO);
1805 if (r2 != _RAX_REGNO)
1806 movr(_RAX_REGNO, r2);
1809 if (r2 != _RAX_REGNO)
1810 movr(_RAX_REGNO, r2);
1811 movr(r0, _RDX_REGNO);
1816 if (r2 != _RAX_REGNO)
1817 movr(_RAX_REGNO, r2);
1821 sign_extend_rdx_rax();
1825 ixorr(_RDX_REGNO, _RDX_REGNO);
1831 if (r0 == _RDX_REGNO && r1 == _RAX_REGNO)
1832 xchgr(_RAX_REGNO, _RDX_REGNO);
1834 if (r0 != _RDX_REGNO)
1835 movr(r0, _RAX_REGNO);
1836 movr(r1, _RDX_REGNO);
1837 if (r0 == _RDX_REGNO)
1838 movr(r0, _RAX_REGNO);
1841 clear(_RDX_REGNO, _RDX);
1842 clear(_RAX_REGNO, _RAX);
1846 _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
1847 jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
1851 reg = jit_get_reg(jit_class_gpr);
1854 qdivr(r0, r1, r2, rn(reg));
1856 qdivr_u(r0, r1, r2, rn(reg));
1861 _andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1876 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1884 else if (r0 == r1) {
1885 if (can_sign_extend_int_p(i0))
1888 reg = jit_get_reg(jit_class_gpr);
1901 _orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1916 _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1923 else if (can_sign_extend_int_p(i0)) {
1927 else if (r0 != r1) {
1932 reg = jit_get_reg(jit_class_gpr);
1940 _xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1955 _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1962 else if (can_sign_extend_int_p(i0)) {
1966 else if (r0 != r1) {
1971 reg = jit_get_reg(jit_class_gpr);
1979 _irotshr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
1981 rex(0, WIDE, _RCX_REGNO, _NOREG, r0);
1983 mrm(0x03, code, r7(r0));
1987 _rotshr(jit_state_t *_jit, jit_int32_t code,
1988 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1993 if (r0 == _RCX_REGNO) {
1994 reg = jit_get_reg(jit_class_gpr);
1996 if (r2 != _RCX_REGNO)
1997 movr(_RCX_REGNO, r2);
1998 irotshr(code, rn(reg));
1999 movr(_RCX_REGNO, rn(reg));
2002 else if (r2 != _RCX_REGNO) {
2003 use = !jit_reg_free_p(_RCX);
2005 reg = jit_get_reg(jit_class_gpr);
2006 movr(rn(reg), _RCX_REGNO);
2010 if (r1 == _RCX_REGNO) {
2012 xchgr(r0, _RCX_REGNO);
2015 movr(_RCX_REGNO, r2);
2019 movr(_RCX_REGNO, r2);
2024 movr(_RCX_REGNO, rn(reg));
2035 _irotshi(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0)
2037 rex(0, WIDE, _NOREG, _NOREG, r0);
2040 mrm(0x03, code, r7(r0));
2044 mrm(0x03, code, r7(r0));
2050 _rotshi(jit_state_t *_jit, jit_int32_t code,
2051 jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2055 irotshi(code, r0, i0);
2059 _xlshr(jit_state_t *_jit, jit_bool_t sign,
2060 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
2062 jit_int32_t sav, set;
2063 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
2064 jit_word_t over, zero, over_done, done;
2066 /* %RCX must be used for shift. */
2067 qsavset(_RCX_REGNO);
2068 allocr(_RCX_REGNO, _RCX);
2069 /* Almost certainly not %RCX */
2071 if (r0 == _RCX_REGNO) {
2072 s0 = jit_get_reg(jit_class_gpr);
2077 /* r0 == r1 is undefined behavior */
2078 if (r1 == _RCX_REGNO) {
2079 s1 = jit_get_reg(jit_class_gpr);
2083 /* Allocate a temporary if a register is used more than once, or if
2084 * the value to shift is %RCX */
2085 if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
2086 s2 = jit_get_reg(jit_class_gpr);
2092 /* Allocate temporary if shift is also one of the outputs */
2093 if (r0 == r3 || r1 == r3) {
2094 s3 = jit_get_reg(jit_class_gpr);
2100 /* Bits to shift right */
2103 /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
2104 movr(_RCX_REGNO, t3);
2105 /* Copy value to low register */
2107 /* SHLD shifts t0 left pulling extra bits in the right from t1.
2108 * It is very handly to shift bignums, but lightning does not support
2109 * these, nor 128 bit integers. The use of q{l,}sh{r,i} is to verify
2110 * if there precision loss in a shift and/or have it as a quick way
2111 * to multiply or divide by powers of two. */
2113 rex(0, WIDE, t1, _NOREG, t0);
2116 mrm(0x03, r7(t1), r7(t0));
2117 /* Must swap results if shift value is __WORDSIZE */
2118 alui(X86_CMP, t3, __WORDSIZE);
2119 over = jes(_jit->pc.w);
2120 /* Calculate bits to shift right and fill high register */
2121 rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
2123 rshr(t1, t2, _RCX_REGNO);
2125 rshr_u(t1, t2, _RCX_REGNO);
2126 /* FIXME t3 == %rcx only happens in 32 bit as %a3 (JIT_A3) is not
2127 * available -- it might be made available at some point, to
2128 * allow optimizing usage or arguments in registers. For now
2129 * keep the code, as one might cheat and use _RCX directly,
2130 * what is not officially supported, but *must* work. */
2131 /* Need to sign extend high register if shift value is zero */
2132 if (t3 == _RCX_REGNO)
2133 alui(X86_CMP, t3, __WORDSIZE);
2135 alui(X86_CMP, t3, 0);
2137 zero = jes(_jit->pc.w);
2138 done = jmpsi(_jit->pc.w);
2139 /* Swap registers if shift is __WORDSIZE */
2140 patch_at(over, _jit->pc.w);
2142 over_done = jmpsi(_jit->pc.w);
2143 /* If shift value is zero */
2144 patch_at(zero, _jit->pc.w);
2146 rshi(t1, t2, __WORDSIZE - 1);
2149 patch_at(over_done, _jit->pc.w);
2150 patch_at(done, _jit->pc.w);
2151 /* Release %RCX (if spilled) after branches */
2152 clear(_RCX_REGNO, _RCX);
2168 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2173 lea(0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0);
2175 rotshi(X86_SHL, r0, r1, i0);
2179 _xlshi(jit_state_t *_jit, jit_bool_t sign,
2180 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
2185 rshi(r1, r2, __WORDSIZE - 1);
2189 else if (i0 == __WORDSIZE) {
2194 assert((jit_uword_t)i0 <= __WORDSIZE);
2196 rshi(r1, r2, __WORDSIZE - i0);
2198 rshi_u(r1, r2, __WORDSIZE - i0);
2204 _xrshr(jit_state_t *_jit, jit_bool_t sign,
2205 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
2207 jit_int32_t sav, set;
2208 jit_int32_t t0, s0, t1, s1, t2, s2, t3, s3;
2209 jit_word_t over, zero, done;
2211 /* %RCX must be used for shift. */
2212 qsavset(_RCX_REGNO);
2213 allocr(_RCX_REGNO, _RCX);
2214 /* Almost certainly not %RCX */
2216 if (r0 == _RCX_REGNO) {
2217 s0 = jit_get_reg(jit_class_gpr);
2222 /* r0 == r1 is undefined behavior */
2223 if (r1 == _RCX_REGNO) {
2224 s1 = jit_get_reg(jit_class_gpr);
2228 /* Allocate a temporary if a register is used more than once, or if
2229 * the value to shift is %RCX */
2230 if (r0 == r2 || r1 == r2 || r2 == _RCX_REGNO) {
2231 s2 = jit_get_reg(jit_class_gpr);
2237 /* Allocate temporary if shift is also one of the outputs */
2238 if (r0 == r3 || r1 == r3) {
2239 s3 = jit_get_reg(jit_class_gpr);
2245 /* Bits to shift left */
2247 rshi(t1, t2, __WORDSIZE - 1);
2248 /* Special case for negative value and zero shift */
2249 alui(X86_CMP, t3, 0);
2250 zero = jnes(_jit->pc.w);
2252 patch_at(zero, _jit->pc.w);
2257 /* Shift < 0 or > __WORDSIZE is undefined behavior and not tested */
2258 movr(_RCX_REGNO, t3);
2259 /* Copy value to low register */
2261 /* SHRD shifts t0 right pulling extra bits in the left from t1 */
2263 rex(0, WIDE, t1, _NOREG, t0);
2266 mrm(0x03, r7(t1), r7(t0));
2267 /* Must swap results if shift value is __WORDSIZE */
2268 alui(X86_CMP, t3, __WORDSIZE);
2269 over = jes(_jit->pc.w);
2270 /* Already zero if shift value is zero */
2271 alui(X86_CMP, t3, 0);
2272 zero = jes(_jit->pc.w);
2273 /* Calculate bits to shift left and fill high register */
2274 rsbi(_RCX_REGNO, _RCX_REGNO, __WORDSIZE);
2275 lshr(t1, t2, _RCX_REGNO);
2276 done = jmpsi(_jit->pc.w);
2277 /* Swap registers if shift is __WORDSIZE */
2278 patch_at(over, _jit->pc.w);
2280 /* If shift value is zero */
2281 patch_at(zero, _jit->pc.w);
2282 patch_at(done, _jit->pc.w);
2283 /* Release %RCX (if spilled) after branches */
2284 clear(_RCX_REGNO, _RCX);
2300 _xrshi(jit_state_t *_jit, jit_bool_t sign,
2301 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
2307 else if (i0 == __WORDSIZE) {
2310 rshi(r0, r2, __WORDSIZE - 1);
2315 assert((jit_uword_t)i0 <= __WORDSIZE);
2316 lshi(r1, r2, __WORDSIZE - i0);
2325 _unr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0)
2327 rex(0, WIDE, _NOREG, _NOREG, r0);
2329 mrm(0x03, code, r7(r0));
2333 _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2344 _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2352 _incr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2356 rex(0, WIDE, _NOREG, _NOREG, r0);
2365 _decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2369 rex(0, WIDE, _NOREG, _NOREG, r0);
2379 _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2386 _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2393 rex(0, WIDE, r0, _NOREG, r1);
2396 mrm(0x3, r7(r0), r7(r1));
2398 /* jump if undefined: r1 == 0 */
2399 w = jccs(X86_CC_E, _jit->pc.w);
2400 /* count leading zeros */
2401 rsbi(r0, r0, __WORDSIZE - 1);
2403 x = jmpsi(_jit->pc.w);
2405 patch_at(w, _jit->pc.w);
2406 movi(r0, __WORDSIZE);
2408 patch_at(x, _jit->pc.w);
2410 /* LZCNT has defined behavior for value zero and count leading zeros */
2414 _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2421 _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2427 t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk);
2431 movi(rn(t0), __WORDSIZE);
2437 rex(0, WIDE, r0, _NOREG, r1);
2440 mrm(0x3, r7(r0), r7(r1));
2442 /* No conditional move or need spill/reload a temporary */
2444 w = jccs(X86_CC_E, _jit->pc.w);
2445 movi(r0, __WORDSIZE);
2446 patch_at(w, _jit->pc.w);
2450 rex(0, WIDE, r0, _NOREG, rn(t0));
2453 mrm(0x3, r7(r0), r7(rn(t0)));
2457 /* TZCNT has defined behavior for value zero */
2461 _rbitr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
2464 jit_int32_t sav, set;
2465 jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
2466 static const unsigned char swap_tab[256] = {
2467 0, 128, 64, 192, 32, 160, 96, 224,
2468 16, 144, 80, 208, 48, 176, 112, 240,
2469 8, 136, 72, 200, 40, 168, 104, 232,
2470 24, 152, 88, 216 ,56, 184, 120, 248,
2471 4, 132, 68, 196, 36, 164, 100, 228,
2472 20, 148, 84, 212, 52, 180, 116, 244,
2473 12, 140, 76, 204, 44, 172, 108, 236,
2474 28, 156, 92, 220, 60, 188, 124, 252,
2475 2, 130, 66, 194, 34, 162, 98, 226,
2476 18, 146, 82, 210, 50, 178, 114, 242,
2477 10, 138, 74, 202, 42, 170, 106, 234,
2478 26, 154, 90, 218, 58, 186, 122, 250,
2479 6, 134, 70, 198, 38, 166, 102, 230,
2480 22, 150, 86, 214, 54, 182, 118, 246,
2481 14, 142, 78, 206, 46, 174, 110, 238,
2482 30, 158, 94, 222, 62, 190, 126, 254,
2483 1, 129, 65, 193, 33, 161, 97, 225,
2484 17, 145, 81, 209, 49, 177, 113, 241,
2485 9, 137, 73, 201, 41, 169, 105, 233,
2486 25, 153, 89, 217, 57, 185, 121, 249,
2487 5, 133, 69, 197, 37, 165, 101, 229,
2488 21, 149, 85, 213, 53, 181, 117, 245,
2489 13, 141, 77, 205, 45, 173, 109, 237,
2490 29, 157, 93, 221, 61, 189, 125, 253,
2491 3, 131, 67, 195, 35, 163, 99, 227,
2492 19, 147, 83, 211, 51, 179, 115, 243,
2493 11, 139, 75, 203, 43, 171, 107, 235,
2494 27, 155, 91, 219, 59, 187, 123, 251,
2495 7, 135, 71, 199, 39, 167, 103, 231,
2496 23, 151, 87, 215, 55, 183, 119, 247,
2497 15, 143, 79, 207, 47, 175, 111, 239,
2498 31, 159, 95, 223, 63, 191, 127, 255
2501 isavset(_RCX_REGNO);
2502 allocr(_RCX_REGNO, _RCX);
2503 if (r0 == _RCX_REGNO) {
2504 t0 = jit_get_reg(jit_class_gpr);
2511 if (r1 == _RCX_REGNO || r0 == r1) {
2512 t1 = jit_get_reg(jit_class_gpr);
2520 t2 = jit_get_reg(jit_class_gpr);
2521 t3 = jit_get_reg(jit_class_gpr);
2522 #if __WORDSIZE == 32
2523 /* Avoid condition that causes running out of registers */
2524 if (!reg8_p(r1_reg)) {
2526 andr(rn(t2), r1_reg, rn(t2));
2530 extr_uc(rn(t2), r1_reg);
2531 movi(rn(t3), (jit_word_t)swap_tab);
2532 ldxr_uc(r0_reg, rn(t3), rn(t2));
2533 movi(_RCX_REGNO, 8);
2535 rshr(rn(t2), r1_reg, _RCX_REGNO);
2536 extr_uc(rn(t2), rn(t2));
2537 lshi(r0_reg, r0_reg, 8);
2538 ldxr_uc(rn(t2), rn(t3), rn(t2));
2539 orr(r0_reg, r0_reg, rn(t2));
2540 addi(_RCX_REGNO, _RCX_REGNO, 8);
2541 alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
2543 clear(_RCX_REGNO, _RCX);
2546 if (t1 != JIT_NOREG)
2548 if (t0 != JIT_NOREG) {
2555 _popcntr(jit_state_t * _jit, jit_int32_t r0, jit_int32_t r1)
2559 rex(0, WIDE, r0, _NOREG, r1);
2562 mrm(0x3, r7(r0), r7(r1));
2566 jit_int32_t sav, set;
2567 jit_int32_t r0_reg, t0, r1_reg, t1, t2, t3;
2568 static const unsigned char pop_tab[256] = {
2569 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
2570 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2571 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2572 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2573 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2574 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2575 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2576 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
2579 isavset(_RCX_REGNO);
2580 allocr(_RCX_REGNO, _RCX);
2581 if (r0 == _RCX_REGNO) {
2582 t0 = jit_get_reg(jit_class_gpr);
2589 if (r1 == _RCX_REGNO || r0 == r1) {
2590 t1 = jit_get_reg(jit_class_gpr);
2598 t2 = jit_get_reg(jit_class_gpr);
2599 t3 = jit_get_reg(jit_class_gpr);
2600 #if __WORDSIZE == 32
2601 /* Avoid condition that causes running out of registers */
2602 if (!reg8_p(r1_reg)) {
2604 andr(rn(t2), r1_reg, rn(t2));
2608 extr_uc(rn(t2), r1_reg);
2609 movi(rn(t3), (jit_word_t)pop_tab);
2610 ldxr_uc(r0_reg, rn(t3), rn(t2));
2611 movi(_RCX_REGNO, 8);
2613 rshr(rn(t2), r1_reg, _RCX_REGNO);
2614 extr_uc(rn(t2), rn(t2));
2615 ldxr_uc(rn(t2), rn(t3), rn(t2));
2616 addr(r0_reg, r0_reg, rn(t2));
2617 addi(_RCX_REGNO, _RCX_REGNO, 8);
2618 alui(X86_CMP, _RCX_REGNO, __WORDSIZE);
2620 clear(_RCX_REGNO, _RCX);
2623 if (t1 != JIT_NOREG)
2625 if (t0 != JIT_NOREG) {
2633 _cr(jit_state_t *_jit,
2634 jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2639 same = r0 == r1 || r0 == r2;
2648 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2649 ixorr(rn(reg), rn(reg));
2658 _ci(jit_state_t *_jit,
2659 jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2673 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2674 ixorr(rn(reg), rn(reg));
2683 _ci0(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1)
2697 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
2698 ixorr(rn(reg), rn(reg));
2707 _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2712 cr(X86_CC_L, r0, r1, r2);
2716 _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2719 ci(X86_CC_L, r0, r1, i0);
2721 ci0(X86_CC_S, r0, r1);
2725 _ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2730 cr(X86_CC_B, r0, r1, r2);
2734 _ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2739 cr(X86_CC_LE, r0, r1, r2);
2743 _ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2748 cr(X86_CC_BE, r0, r1, r2);
2752 _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2755 ci(X86_CC_BE, r0, r1, i0);
2757 ci0(X86_CC_E, r0, r1);
2761 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2766 cr(X86_CC_E, r0, r1, r2);
2770 _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2773 ci(X86_CC_E, r0, r1, i0);
2775 ci0(X86_CC_E, r0, r1);
2779 _ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2784 cr(X86_CC_GE, r0, r1, r2);
2788 _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2791 ci(X86_CC_GE, r0, r1, i0);
2793 ci0(X86_CC_NS, r0, r1);
2797 _ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2802 cr(X86_CC_AE, r0, r1, r2);
2806 _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2809 ci(X86_CC_AE, r0, r1, i0);
2811 ci0(X86_CC_NB, r0, r1);
2815 _gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2820 cr(X86_CC_G, r0, r1, r2);
2824 _gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2829 cr(X86_CC_A, r0, r1, r2);
2833 _gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2836 ci(X86_CC_A, r0, r1, i0);
2838 ci0(X86_CC_NE, r0, r1);
2842 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2847 cr(X86_CC_NE, r0, r1, r2);
2851 _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2854 ci(X86_CC_NE, r0, r1, i0);
2856 ci0(X86_CC_NE, r0, r1);
2860 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2863 rex(0, 1, r1, _NOREG, r0);
2865 ic(0xc0 | (r1 << 3) | r7(r0));
2870 _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2874 if (fits_uint32_p(i0)) {
2876 rex(0, 0, _NOREG, _NOREG, r0);
2881 else if (can_sign_extend_int_p(i0)) {
2882 rex(0, 1, _NOREG, _NOREG, r0);
2888 rex(0, 1, _NOREG, _NOREG, r0);
2904 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2910 rel = rel < 0 ? rel - 8 : rel + 8;
2911 if (can_sign_extend_int_p(rel)) {
2912 /* lea rel(%rip), %r0 */
2913 rex(0, WIDE, r0, _NOREG, _NOREG);
2916 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
2930 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2933 rex(0, WIDE, _NOREG, _NOREG, r0);
2941 _movcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2943 rex(0, WIDE, r0, _NOREG, r1);
2946 mrm(0x03, r7(r0), r7(r1));
2950 _movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2952 rex(0, WIDE, r0, _NOREG, r1);
2955 mrm(0x03, r7(r0), r7(r1));
2959 _movsr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2961 rex(0, WIDE, r0, _NOREG, r1);
2964 mrm(0x03, r7(r0), r7(r1));
2968 _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2970 rex(0, WIDE, r0, _NOREG, r1);
2973 mrm(0x03, r7(r0), r7(r1));
2977 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
2978 jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
2980 jit_int32_t save_rax, restore_rax;
2981 jit_int32_t ascasr_reg, ascasr_use;
2982 if (r0 != _RAX_REGNO) { /* result not in %rax */
2983 if (r2 != _RAX_REGNO) { /* old value not in %rax */
2984 save_rax = jit_get_reg(jit_class_gpr);
2985 movr(rn(save_rax), _RAX_REGNO);
2993 if (r2 != _RAX_REGNO)
2994 movr(_RAX_REGNO, r2);
2995 if (r1 == _NOREG) { /* using immediate address */
2996 if (!can_sign_extend_int_p(i0)) {
2997 ascasr_reg = jit_get_reg(jit_class_gpr);
2998 if (ascasr_reg == _RAX) {
2999 ascasr_reg = jit_get_reg(jit_class_gpr);
3000 jit_unget_reg(_RAX);
3003 movi(rn(ascasr_reg), i0);
3010 ic(0xf0); /* lock */
3012 rex(0, WIDE, r3, _NOREG, rn(ascasr_reg));
3014 rex(0, WIDE, r3, _NOREG, r1);
3017 if (r1 != _NOREG) /* casr */
3018 rx(r3, 0, r1, _NOREG, _SCL1);
3021 rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */
3023 rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */
3026 if (r0 != _RAX_REGNO)
3027 movr(r0, _RAX_REGNO);
3029 movr(_RAX_REGNO, rn(save_rax));
3030 jit_unget_reg(save_rax);
3033 jit_unget_reg(ascasr_reg);
3037 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3039 assert(jit_cmov_p());
3043 rex(0, WIDE, r0, _NOREG, r1);
3046 mrm(0x03, r7(r0), r7(r1));
3050 _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3052 assert(jit_cmov_p());
3056 rex(0, WIDE, r0, _NOREG, r1);
3059 mrm(0x03, r7(r0), r7(r1));
3064 _movir(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3066 rex(0, 1, r0, _NOREG, r1);
3068 mrm(0x03, r7(r0), r7(r1));
3072 _movir_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3074 rex(0, 0, r1, _NOREG, r0);
3076 ic(0xc0 | (r1 << 3) | r7(r0));
3081 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3085 rex(0, 0, _NOREG, _NOREG, r0);
3087 mrm(0x03, X86_ROR, r7(r0));
3092 _bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3095 rex(0, 0, _NOREG, _NOREG, r0);
3100 #if __X64 && !__X64_32
3102 _bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3105 rex(0, 1, _NOREG, _NOREG, r0);
3112 _extr(jit_state_t *_jit,
3113 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3116 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3117 if (i1 == __WORDSIZE)
3120 if (__WORDSIZE - (i0 + i1)) {
3121 lshi(r0, r1, __WORDSIZE - (i0 + i1));
3122 rshi(r0, r0, __WORDSIZE - i1);
3125 rshi(r0, r1, __WORDSIZE - i1);
3130 _extr_u(jit_state_t *_jit,
3131 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3135 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3136 if (i1 == __WORDSIZE)
3138 /* Only cheaper in code size or number of instructions if i0 is not zero */
3139 /* Number of cpu cicles not tested */
3140 else if (i0 && jit_cpu.bmi2) {
3141 mask = ((ONE << i1) - 1) << i0;
3142 t0 = jit_get_reg(jit_class_gpr);
3145 vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 2);
3147 mrm(0x03, r7(r0), r7(rn(t0)));
3153 andi(r0, r0, (ONE << i1) - 1);
3158 _depr(jit_state_t *_jit,
3159 jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
3163 assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
3164 if (i1 == __WORDSIZE)
3166 /* Only cheaper in code size or number of instructions if i0 is not zero */
3167 /* Number of cpu cicles not tested */
3168 else if (i0 && jit_cpu.bmi2) {
3169 mask = ((ONE << i1) - 1) << i0;
3170 t0 = jit_get_reg(jit_class_gpr);
3171 t1 = jit_get_reg(jit_class_gpr);
3175 vex(r0, _NOREG, rn(t0), 2, WIDE, r1, 0, 3);
3177 mrm(0x03, r7(r0), r7(rn(t0)));
3178 andi(rn(t1), rn(t1), ~mask);
3179 orr(r0, r0, rn(t1));
3184 mask = (ONE << i1) - 1;
3185 t0 = jit_get_reg(jit_class_gpr);
3186 andi(rn(t0), r1, mask);
3188 lshi(rn(t0), rn(t0), i0);
3191 andi(r0, r0, ~mask);
3192 orr(r0, r0, rn(t0));
3198 _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3204 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3212 _extr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3218 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3220 movcr_u(r0, rn(reg));
3226 _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3228 rex(0, WIDE, r0, _NOREG, r1);
3231 rx(r0, 0, r1, _NOREG, _SCL1);
3235 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3239 jit_word_t rel = i0 - _jit->pc.w;
3240 rel = rel < 0 ? rel - 8 : rel + 8;
3241 if (can_sign_extend_int_p(rel)) {
3242 rex(0, WIDE, r0, _NOREG, _NOREG);
3245 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3249 if (address_p(i0)) {
3250 rex(0, WIDE, r0, _NOREG, _NOREG);
3253 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3256 reg = jit_get_reg(jit_class_gpr);
3264 _ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3266 rex(0, WIDE, r0, _NOREG, r1);
3269 rx(r0, 0, r1, _NOREG, _SCL1);
3273 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3277 jit_word_t rel = i0 - _jit->pc.w;
3278 rel = rel < 0 ? rel - 8 : rel + 8;
3279 if (can_sign_extend_int_p(rel)) {
3280 rex(0, WIDE, r0, _NOREG, _NOREG);
3283 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3287 if (address_p(i0)) {
3288 rex(0, WIDE, r0, _NOREG, _NOREG);
3291 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3294 reg = jit_get_reg(jit_class_gpr);
3296 ldr_uc(r0, rn(reg));
3302 _ldr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3304 rex(0, WIDE, r0, _NOREG, r1);
3307 rx(r0, 0, r1, _NOREG, _SCL1);
3311 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3315 jit_word_t rel = i0 - _jit->pc.w;
3316 rel = rel < 0 ? rel - 8 : rel + 8;
3317 if (can_sign_extend_int_p(rel)) {
3318 rex(0, WIDE, r0, _NOREG, _NOREG);
3321 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3325 if (address_p(i0)) {
3326 rex(0, WIDE, r0, _NOREG, _NOREG);
3329 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3332 reg = jit_get_reg(jit_class_gpr);
3340 _ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3342 rex(0, WIDE, r0, _NOREG, r1);
3345 rx(r0, 0, r1, _NOREG, _SCL1);
3349 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3353 jit_word_t rel = i0 - _jit->pc.w;
3354 rel = rel < 0 ? rel - 8 : rel + 8;
3355 if (can_sign_extend_int_p(rel)) {
3356 rex(0, WIDE, r0, _NOREG, _NOREG);
3359 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3363 if (address_p(i0)) {
3364 rex(0, WIDE, r0, _NOREG, _NOREG);
3367 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3370 reg = jit_get_reg(jit_class_gpr);
3372 ldr_us(r0, rn(reg));
3377 #if __X32 || !__X64_32
3379 _ldr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3382 rex(0, WIDE, r0, _NOREG, r1);
3387 rx(r0, 0, r1, _NOREG, _SCL1);
3391 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3395 jit_word_t rel = i0 - _jit->pc.w;
3396 rel = rel < 0 ? rel - 8 : rel + 8;
3397 if (can_sign_extend_int_p(rel)) {
3398 rex(0, WIDE, r0, _NOREG, _NOREG);
3400 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3404 if (address_p(i0)) {
3406 rex(0, WIDE, r0, _NOREG, _NOREG);
3411 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3414 reg = jit_get_reg(jit_class_gpr);
3424 _ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3426 rex(0, 0, r0, _NOREG, r1);
3428 rx(r0, 0, r1, _NOREG, _SCL1);
3432 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3436 jit_word_t rel = i0 - _jit->pc.w;
3437 rel = rel < 0 ? rel - 8 : rel + 8;
3438 if (can_sign_extend_int_p(rel)) {
3439 rex(0, 0, r0, _NOREG, _NOREG);
3441 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3445 if (address_p(i0)) {
3446 rex(0, 0, r0, _NOREG, _NOREG);
3448 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3451 reg = jit_get_reg(jit_class_gpr);
3456 ldr_ui(r0, rn(reg));
3464 _ldr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3466 rex(0, 1, r0, _NOREG, r1);
3468 rx(r0, 0, r1, _NOREG, _SCL1);
3472 _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
3475 jit_word_t rel = i0 - _jit->pc.w;
3476 rel = rel < 0 ? rel - 8 : rel + 8;
3477 if (can_sign_extend_int_p(rel)) {
3478 rex(0, WIDE, r0, _NOREG, _NOREG);
3480 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3482 else if (can_sign_extend_int_p(i0)) {
3483 rex(0, WIDE, r0, _NOREG, _NOREG);
3485 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3488 reg = jit_get_reg(jit_class_gpr);
3498 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3504 rex(0, WIDE, r0, r1, r2);
3507 rx(r0, 0, r2, r1, _SCL1);
3512 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3515 if (can_sign_extend_int_p(i0)) {
3516 rex(0, WIDE, r0, _NOREG, r1);
3519 rx(r0, i0, r1, _NOREG, _SCL1);
3522 reg = jit_get_reg(jit_class_gpr);
3524 ldxr_c(r0, r1, rn(reg));
3530 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3536 rex(0, WIDE, r0, r1, r2);
3539 rx(r0, 0, r2, r1, _SCL1);
3544 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3547 if (can_sign_extend_int_p(i0)) {
3548 rex(0, WIDE, r0, _NOREG, r1);
3551 rx(r0, i0, r1, _NOREG, _SCL1);
3554 reg = jit_get_reg(jit_class_gpr);
3556 ldxr_uc(r0, r1, rn(reg));
3562 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3568 rex(0, WIDE, r0, r1, r2);
3571 rx(r0, 0, r2, r1, _SCL1);
3576 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3579 if (can_sign_extend_int_p(i0)) {
3580 rex(0, WIDE, r0, _NOREG, r1);
3583 rx(r0, i0, r1, _NOREG, _SCL1);
3586 reg = jit_get_reg(jit_class_gpr);
3588 ldxr_s(r0, r1, rn(reg));
3594 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3600 rex(0, WIDE, r0, r1, r2);
3603 rx(r0, 0, r2, r1, _SCL1);
3608 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3611 if (can_sign_extend_int_p(i0)) {
3612 rex(0, WIDE, r0, _NOREG, r1);
3615 rx(r0, i0, r1, _NOREG, _SCL1);
3618 reg = jit_get_reg(jit_class_gpr);
3620 ldxr_us(r0, r1, rn(reg));
3625 #if __X64 || !__X64_32
3627 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3630 rex(0, WIDE, r0, r1, r2);
3635 rx(r0, 0, r2, r1, _SCL1);
3639 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3642 if (can_sign_extend_int_p(i0)) {
3644 rex(0, WIDE, r0, _NOREG, r1);
3649 rx(r0, i0, r1, _NOREG, _SCL1);
3652 reg = jit_get_reg(jit_class_gpr);
3654 ldxr_i(r0, r1, rn(reg));
3662 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3666 /* to avoid confusion with macro renames */
3667 _ldr_ui(_jit, r0, r0);
3669 rex(0, 0, r0, r1, r2);
3671 rx(r0, 0, r2, r1, _SCL1);
3676 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3679 if (can_sign_extend_int_p(i0)) {
3680 rex(0, 0, r0, _NOREG, r1);
3682 rx(r0, i0, r1, _NOREG, _SCL1);
3685 reg = jit_get_reg(jit_class_gpr);
3688 ldxr_i(r0, r1, rn(reg));
3690 ldxr_ui(r0, r1, rn(reg));
3698 _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3700 rex(0, 1, r0, r1, r2);
3702 rx(r0, 0, r2, r1, _SCL1);
3706 _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
3709 if (can_sign_extend_int_p(i0)) {
3710 rex(0, 1, r0, _NOREG, r1);
3712 rx(r0, i0, r1, _NOREG, _SCL1);
3715 reg = jit_get_reg(jit_class_gpr);
3717 ldxr_l(r0, r1, rn(reg));
3725 _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3729 rex(0, 0, r1, _NOREG, r0);
3731 rx(r1, 0, r0, _NOREG, _SCL1);
3734 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3736 rex(0, 0, rn(reg), _NOREG, r0);
3738 rx(rn(reg), 0, r0, _NOREG, _SCL1);
3744 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3748 jit_word_t rel = i0 - _jit->pc.w;
3749 rel = rel < 0 ? rel - 16 : rel + 16;
3750 if (can_sign_extend_int_p(rel)) {
3752 rex(0, 0, r0, _NOREG, _NOREG);
3754 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3757 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3759 rex(0, 0, rn(reg), _NOREG, _NOREG);
3761 rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3767 if (address_p(i0)) {
3769 rex(0, 0, r0, _NOREG, _NOREG);
3771 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3774 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3776 rex(0, 0, rn(reg), _NOREG, _NOREG);
3778 rx(rn(reg), i0, _NOREG, _NOREG, _SCL1);
3783 reg = jit_get_reg(jit_class_gpr);
3791 _str_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3794 rex(0, 0, r1, _NOREG, r0);
3796 rx(r1, 0, r0, _NOREG, _SCL1);
3800 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3804 jit_word_t rel = i0 - _jit->pc.w;
3805 rel = rel < 0 ? rel - 8 : rel + 8;
3806 if (can_sign_extend_int_p(rel)) {
3808 rex(0, 0, r0, _NOREG, _NOREG);
3810 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3814 if (address_p(i0)) {
3816 rex(0, 0, r0, _NOREG, _NOREG);
3818 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3821 reg = jit_get_reg(jit_class_gpr);
3829 _str_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3831 rex(0, 0, r1, _NOREG, r0);
3833 rx(r1, 0, r0, _NOREG, _SCL1);
3837 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3841 jit_word_t rel = i0 - _jit->pc.w;
3842 rel = rel < 0 ? rel - 8 : rel + 8;
3843 if (can_sign_extend_int_p(rel)) {
3844 rex(0, 0, r0, _NOREG, _NOREG);
3846 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3850 if (address_p(i0)) {
3851 rex(0, 0, r0, _NOREG, _NOREG);
3853 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3856 reg = jit_get_reg(jit_class_gpr);
3863 #if __X64 && !__X64_32
3865 _str_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
3867 rex(0, 1, r1, _NOREG, r0);
3869 rx(r1, 0, r0, _NOREG, _SCL1);
3873 _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
3877 jit_word_t rel = i0 - _jit->pc.w;
3878 rel = rel < 0 ? rel - 8 : rel + 8;
3879 if (can_sign_extend_int_p(rel)) {
3880 rex(0, WIDE, r0, _NOREG, _NOREG);
3882 rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
3886 if (can_sign_extend_int_p(i0)) {
3887 rex(0, WIDE, r0, _NOREG, _NOREG);
3889 rx(r0, i0, _NOREG, _NOREG, _SCL1);
3892 reg = jit_get_reg(jit_class_gpr);
3901 _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3905 reg = jit_get_reg(jit_class_gpr);
3906 addr(rn(reg), r0, r1);
3911 rex(0, 0, r2, r1, r0);
3913 rx(r2, 0, r0, r1, _SCL1);
3916 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3918 rex(0, 0, rn(reg), r1, r0);
3920 rx(rn(reg), 0, r0, r1, _SCL1);
3927 _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3930 if (can_sign_extend_int_p(i0)) {
3932 rex(0, 0, r1, _NOREG, r0);
3934 rx(r1, i0, r0, _NOREG, _SCL1);
3937 reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
3939 rex(0, 0, rn(reg), _NOREG, r0);
3941 rx(rn(reg), i0, r0, _NOREG, _SCL1);
3946 reg = jit_get_reg(jit_class_gpr);
3948 stxr_c(rn(reg), r0, r1);
3954 _stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3958 reg = jit_get_reg(jit_class_gpr);
3959 addr(rn(reg), r0, r1);
3964 rex(0, 0, r2, r1, r0);
3966 rx(r2, 0, r0, r1, _SCL1);
3971 _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
3974 if (can_sign_extend_int_p(i0)) {
3976 rex(0, 0, r1, _NOREG, r0);
3978 rx(r1, i0, r0, _NOREG, _SCL1);
3981 reg = jit_get_reg(jit_class_gpr);
3983 stxr_s(rn(reg), r0, r1);
3989 _stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
3993 reg = jit_get_reg(jit_class_gpr);
3994 addr(rn(reg), r0, r1);
3998 rex(0, 0, r2, r1, r0);
4000 rx(r2, 0, r0, r1, _SCL1);
4005 _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4008 if (can_sign_extend_int_p(i0)) {
4009 rex(0, 0, r1, _NOREG, r0);
4011 rx(r1, i0, r0, _NOREG, _SCL1);
4014 reg = jit_get_reg(jit_class_gpr);
4016 stxr_i(rn(reg), r0, r1);
4021 #if __X64 && !__X64_32
4023 _stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
4025 rex(0, 1, r2, r1, r0);
4027 rx(r2, 0, r0, r1, _SCL1);
4031 _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4034 if (can_sign_extend_int_p(i0)) {
4035 rex(0, 1, r1, _NOREG, r0);
4037 rx(r1, i0, r0, _NOREG, _SCL1);
4040 reg = jit_get_reg(jit_class_gpr);
4042 stxr_l(rn(reg), r0, r1);
4049 _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
4061 _jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
4074 _jcr(jit_state_t *_jit,
4075 jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4077 alur(X86_CMP, r0, r1);
4078 return (jcc(code, i0));
4082 _jci(jit_state_t *_jit,
4083 jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4085 alui(X86_CMP, r0, i1);
4086 return (jcc(code, i0));
4090 _jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
4093 return (jcc(code, i0));
4097 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4099 return (jcr(X86_CC_L, i0, r0, r1));
4103 _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4106 if (i1) w = jci (X86_CC_L, i0, r0, i1);
4107 else w = jci0(X86_CC_S, i0, r0);
4112 _bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4114 return (jcr(X86_CC_B, i0, r0, r1));
4118 _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4121 if (i1) w = jci (X86_CC_B, i0, r0, i1);
4122 else w = jci0(X86_CC_B, i0, r0);
4127 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4130 if (r0 == r1) w = jmpi(i0);
4131 else w = jcr (X86_CC_LE, i0, r0, r1);
4136 _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4139 if (i1) w = jci (X86_CC_LE, i0, r0, i1);
4140 else w = jci0(X86_CC_LE, i0, r0);
4145 _bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4148 if (r0 == r1) w = jmpi(i0);
4149 else w = jcr (X86_CC_BE, i0, r0, r1);
4154 _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4157 if (i1) w = jci (X86_CC_BE, i0, r0, i1);
4158 else w = jci0(X86_CC_BE, i0, r0);
4163 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4166 if (r0 == r1) w = jmpi(i0);
4167 else w = jcr (X86_CC_E, i0, r0, r1);
4172 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4175 if (i1) w = jci (X86_CC_E, i0, r0, i1);
4176 else w = jci0(X86_CC_E, i0, r0);
4181 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4184 if (r0 == r1) w = jmpi(i0);
4185 else w = jcr (X86_CC_GE, i0, r0, r1);
4190 _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4193 if (i1) w = jci (X86_CC_GE, i0, r0, i1);
4194 else w = jci0(X86_CC_NS, i0, r0);
4199 _bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4202 if (r0 == r1) w = jmpi(i0);
4203 else w = jcr (X86_CC_AE, i0, r0, r1);
4208 _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4211 if (i1) w = jci (X86_CC_AE, i0, r0, i1);
4217 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4219 return (jcr(X86_CC_G, i0, r0, r1));
4223 _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4225 return (jci(X86_CC_G, i0, r0, i1));
4229 _bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4231 return (jcr(X86_CC_A, i0, r0, r1));
4235 _bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4238 if (i1) w = jci (X86_CC_A, i0, r0, i1);
4239 else w = jci0(X86_CC_NE, i0, r0);
4244 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4246 return (jcr(X86_CC_NE, i0, r0, r1));
4250 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4253 if (i1) w = jci (X86_CC_NE, i0, r0, i1);
4254 else w = jci0(X86_CC_NE, i0, r0);
4259 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4266 _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4269 if (can_zero_extend_int_p(i1))
4272 reg = jit_get_reg(jit_class_gpr);
4281 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4288 _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4291 if (can_zero_extend_int_p(i1))
4294 reg = jit_get_reg(jit_class_gpr);
4303 _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4310 _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4313 if (can_sign_extend_int_p(i1)) {
4317 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4320 return (boaddr(i0, r0, rn(reg)));
4324 _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4331 _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4334 if (can_sign_extend_int_p(i1)) {
4338 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4341 return (boaddr_u(i0, r0, rn(reg)));
4345 _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4352 _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4355 if (can_sign_extend_int_p(i1)) {
4359 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4362 return (bxaddr(i0, r0, rn(reg)));
4366 _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4373 _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4376 if (can_sign_extend_int_p(i1)) {
4380 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4383 return (bxaddr_u(i0, r0, rn(reg)));
4387 _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4394 _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4397 if (can_sign_extend_int_p(i1)) {
4401 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4404 return (bosubr(i0, r0, rn(reg)));
4408 _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4415 _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4418 if (can_sign_extend_int_p(i1)) {
4422 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4425 return (bosubr_u(i0, r0, rn(reg)));
4429 _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4436 _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4439 if (can_sign_extend_int_p(i1)) {
4443 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4446 return (bxsubr(i0, r0, rn(reg)));
4450 _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
4457 _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
4460 if (can_sign_extend_int_p(i1)) {
4464 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4467 return (bxsubr_u(i0, r0, rn(reg)));
4471 _callr(jit_state_t *_jit, jit_int32_t r0)
4473 rex(0, 0, _NOREG, _NOREG, r0);
4475 mrm(0x03, 0x02, r7(r0));
4479 _calli(jit_state_t *_jit, jit_word_t i0)
4483 jit_word_t l = _jit->pc.w + 5;
4488 !((d < 0) ^ (l < 0)) &&
4490 (jit_int32_t)d == d) {
4505 _calli_p(jit_state_t *_jit, jit_word_t i0)
4509 reg = jit_get_reg(jit_class_gpr);
4510 w = movi_p(rn(reg), i0);
4518 _jmpr(jit_state_t *_jit, jit_int32_t r0)
4520 rex(0, 0, _NOREG, _NOREG, r0);
4522 mrm(0x03, 0x04, r7(r0));
4526 _jmpi(jit_state_t *_jit, jit_word_t i0)
4530 jit_word_t l = _jit->pc.w + 5;
4535 !((d < 0) ^ (l < 0)) &&
4537 (jit_int32_t)d == d) {
4552 _jmpi_p(jit_state_t *_jit, jit_word_t i0)
4556 reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
4557 w = movi_p(rn(reg), i0);
4565 _jmpsi(jit_state_t *_jit, jit_uint8_t i0)
4567 jit_word_t w = _jit->pc.w;
4577 _prolog(jit_state_t *_jit, jit_node_t *node)
4579 jit_int32_t reg, offs;
4580 if (_jitc->function->define_frame || _jitc->function->assume_frame) {
4581 jit_int32_t frame = -_jitc->function->frame;
4583 assert(_jitc->function->self.aoff >= frame);
4584 if (_jitc->function->assume_frame)
4586 _jitc->function->self.aoff = frame;
4588 if (_jitc->function->allocar)
4589 _jitc->function->self.aoff &= -16;
4590 #if __X64 && (__CYGWIN__ || _WIN32)
4591 _jitc->function->stack = (((/* first 32 bytes must be allocated */
4592 (_jitc->function->self.alen > 32 ?
4593 _jitc->function->self.alen : 32) -
4594 /* align stack at 16 bytes */
4595 _jitc->function->self.aoff) + 15) & -16);
4597 _jitc->function->stack = (((_jitc->function->self.alen -
4598 _jitc->function->self.aoff) + 15) & -16);
4601 if (_jitc->function->stack)
4602 _jitc->function->need_stack = 1;
4604 if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
4605 /* check if any callee save register needs to be saved */
4606 for (reg = 0; reg < _jitc->reglen; ++reg)
4607 if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
4608 (_rvs[reg].spec & jit_class_sav)) {
4609 _jitc->function->need_stack = 1;
4614 if (_jitc->function->need_frame || _jitc->function->need_stack)
4615 subi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4616 /* callee save registers */
4617 for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
4618 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4619 stxi(offs, _RSP_REGNO, rn(iregs[reg]));
4620 offs += REAL_WORDSIZE;
4623 #if __X64 && (__CYGWIN__ || _WIN32)
4624 for (reg = 0; reg < jit_size(fregs); reg++) {
4625 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
4626 sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg]));
4627 offs += sizeof(jit_float64_t);
4632 if (_jitc->function->need_frame) {
4633 stxi(0, _RSP_REGNO, _RBP_REGNO);
4634 movr(_RBP_REGNO, _RSP_REGNO);
4638 if (_jitc->function->stack)
4639 subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
4640 if (_jitc->function->allocar) {
4641 reg = jit_get_reg(jit_class_gpr);
4642 movi(rn(reg), _jitc->function->self.aoff);
4643 stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg));
4647 #if __X64 && !(__CYGWIN__ || _WIN32)
4648 if (_jitc->function->self.call & jit_call_varargs) {
4649 jit_word_t nofp_code;
4651 /* Save gp registers in the save area, if any is a vararg */
4652 for (reg = first_gp_from_offset(_jitc->function->vagp);
4653 jit_arg_reg_p(reg); ++reg)
4654 stxi(_jitc->function->vaoff + first_gp_offset +
4655 reg * 8, _RBP_REGNO, rn(JIT_RA0 - reg));
4657 reg = first_fp_from_offset(_jitc->function->vafp);
4658 if (jit_arg_f_reg_p(reg)) {
4659 /* Skip over if no float registers were passed as argument */
4665 /* Save fp registers in the save area, if any is a vararg */
4666 /* Note that the full 16 byte xmm is not saved, because
4667 * lightning only handles float and double, and, while
4668 * attempting to provide a va_list compatible pointer as
4669 * jit_va_start return, does not guarantee it (on all ports). */
4670 for (; jit_arg_f_reg_p(reg); ++reg)
4671 sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
4672 reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
4674 patch_at(nofp_code, _jit->pc.w);
4681 _epilog(jit_state_t *_jit, jit_node_t *node)
4683 jit_int32_t reg, offs;
4684 if (_jitc->function->assume_frame)
4686 if (_jitc->function->need_frame)
4687 movr(_RSP_REGNO, _RBP_REGNO);
4689 /* callee save registers */
4690 for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
4691 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
4692 ldxi(rn(iregs[reg]), _RSP_REGNO, offs);
4693 offs += REAL_WORDSIZE;
4696 #if __X64 && (__CYGWIN__ || _WIN32)
4697 for (reg = 0; reg < jit_size(fregs); reg++) {
4698 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
4699 sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs);
4700 offs += sizeof(jit_float64_t);
4705 if (_jitc->function->need_frame) {
4706 ldxi(_RBP_REGNO, _RSP_REGNO, 0);
4707 addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4709 /* This condition does not happen as much as expected because
4710 * it is not safe to not create a frame pointer if any function
4711 * is called, even jit functions, as those might call external
4713 else if (_jitc->function->need_stack)
4714 addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
4720 _vastart(jit_state_t *_jit, jit_int32_t r0)
4722 #if __X32 || __CYGWIN__ || _WIN32
4723 assert(_jitc->function->self.call & jit_call_varargs);
4724 addi(r0, _RBP_REGNO, jit_selfsize());
4728 assert(_jitc->function->self.call & jit_call_varargs);
4730 /* Return jit_va_list_t in the register argument */
4731 addi(r0, _RBP_REGNO, _jitc->function->vaoff);
4732 reg = jit_get_reg(jit_class_gpr);
4734 /* Initialize gp offset in the save area. */
4735 movi(rn(reg), _jitc->function->vagp);
4736 stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
4738 /* Initialize fp offset in the save area. */
4739 movi(rn(reg), _jitc->function->vafp);
4740 stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
4742 /* Initialize overflow pointer to the first stack argument. */
4743 addi(rn(reg), _RBP_REGNO, jit_selfsize());
4744 stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
4746 /* Initialize register save area pointer. */
4747 addi(rn(reg), r0, first_gp_offset);
4748 stxi(offsetof(jit_va_list_t, save), r0, rn(reg));
4755 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4757 #if __X32 || __CYGWIN__ || _WIN32
4758 assert(_jitc->function->self.call & jit_call_varargs);
4760 addi(r1, r1, va_gp_increment);
4767 assert(_jitc->function->self.call & jit_call_varargs);
4769 rg0 = jit_get_reg(jit_class_gpr);
4770 rg1 = jit_get_reg(jit_class_gpr);
4772 /* Load the gp offset in save area in the first temporary. */
4773 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
4775 /* Jump over if there are no remaining arguments in the save area. */
4776 icmpi(rn(rg0), va_gp_max_offset);
4779 /* Load the save area pointer in the second temporary. */
4780 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
4782 /* Load the vararg argument in the first argument. */
4783 ldxr(r0, rn(rg1), rn(rg0));
4785 /* Update the gp offset. */
4786 addi(rn(rg0), rn(rg0), 8);
4787 stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
4789 /* Will only need one temporary register below. */
4792 /* Jump over overflow code. */
4795 /* Where to land if argument is in overflow area. */
4796 patch_at(ge_code, _jit->pc.w);
4798 /* Load overflow pointer. */
4799 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
4801 /* Load argument. */
4804 /* Update overflow pointer. */
4805 addi(rn(rg0), rn(rg0), va_gp_increment);
4806 stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
4808 /* Where to land if argument is in save area. */
4809 patch_at(lt_code, _jit->pc.w);
4815 /* The x87 boolean argument tells if will put the result in a x87
4816 * register if non false, in a sse register otherwise. */
4818 _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
4820 #if __X32 || __CYGWIN__ || _WIN32
4821 assert(_jitc->function->self.call & jit_call_varargs);
4833 assert(_jitc->function->self.call & jit_call_varargs);
4835 rg0 = jit_get_reg(jit_class_gpr);
4836 rg1 = jit_get_reg(jit_class_gpr);
4838 /* Load the fp offset in save area in the first temporary. */
4839 ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
4841 /* Jump over if there are no remaining arguments in the save area. */
4842 icmpi(rn(rg0), va_fp_max_offset);
4845 /* Load the save area pointer in the second temporary. */
4846 ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
4848 /* Load the vararg argument in the first argument. */
4850 x87_ldxr_d(r0, rn(rg1), rn(rg0));
4852 sse_ldxr_d(r0, rn(rg1), rn(rg0));
4854 /* Update the fp offset. */
4855 addi(rn(rg0), rn(rg0), va_fp_increment);
4856 stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
4858 /* Will only need one temporary register below. */
4861 /* Jump over overflow code. */
4864 /* Where to land if argument is in overflow area. */
4865 patch_at(ge_code, _jit->pc.w);
4867 /* Load overflow pointer. */
4868 ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
4870 /* Load argument. */
4872 x87_ldr_d(r0, rn(rg0));
4874 sse_ldr_d(r0, rn(rg0));
4876 /* Update overflow pointer. */
4877 addi(rn(rg0), rn(rg0), 8);
4878 stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
4880 /* Where to land if argument is in save area. */
4881 patch_at(lt_code, _jit->pc.w);
4888 _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
4891 jit_uint8_t *code = (jit_uint8_t *)instr;
4896 *(jit_word_t *)instr = label;
4898 /* forward pc relative address known to be in range */
4908 if (code[1] < 0x80 || code[1] > 0x8f)
4917 disp = label - (instr + 4);
4918 assert((jit_int32_t)disp == disp);
4919 *(jit_int32_t *)instr = disp;
4925 disp = label - (instr + 1);
4926 assert((jit_int8_t)disp == disp);
4927 *(jit_int8_t *)instr = disp;