2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 * This file is part of GNU lightning.
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
17 * Paulo Cesar Pereira de Andrade
20 #include <lightning.h>
21 #include <lightning/jit_private.h>
24 # define CAN_RIP_ADDRESS 0
25 # define address_p(i0) 1
26 # define jit_arg_reg_p(i) 0
27 # define jit_arg_f_reg_p(i) 0
28 /* callee save + 16 byte align
29 * align16(%ebp + %rbx + %rsi + %rdi) + (16 - 4) */
30 # define stack_framesize 28
31 # define REAL_WORDSIZE 4
32 # define va_gp_increment 4
33 # define va_fp_increment 8
35 # if _WIN32 || __X64_32
36 # define CAN_RIP_ADDRESS 0
38 # define CAN_RIP_ADDRESS 1
41 # define address_p(i0) ((jit_word_t)(i0) >= 0)
43 # define address_p(i0) can_sign_extend_int_p(i0)
45 # if __CYGWIN__ || _WIN32
46 # define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4)
47 # define jit_arg_f_reg_p(i) jit_arg_reg_p(i)
48 /* callee save + 16 byte align
49 * align16(%rbp+%rbx+%rdi+%rsi+%r1[2-5]+%xmm[6-9]+%xmm1[0-5]) + (16 - 8) */
50 # define stack_framesize 152
51 # define va_fp_increment 8
53 # define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6)
54 # define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8)
55 /* callee save + 16 byte align
56 * align16(%rbp + %r15 + %r14 + %r13 + %r12 + %rbx) + (16 - 8) */
57 # define stack_framesize 56
58 # define first_gp_argument rdi
59 # define first_gp_offset offsetof(jit_va_list_t, rdi)
60 # define first_gp_from_offset(gp) ((gp) / 8)
61 # define last_gp_argument r9
62 # define va_gp_max_offset \
63 (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
64 # define first_fp_argument xmm0
65 # define first_fp_offset offsetof(jit_va_list_t, xmm0)
66 # define last_fp_argument xmm7
67 # define va_fp_max_offset \
68 (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
69 # define va_fp_increment 16
70 # define first_fp_from_offset(fp) (((fp) - va_gp_max_offset) / 16)
72 # define va_gp_increment 8
73 # define REAL_WORDSIZE 8
75 #define CVT_OFFSET _jitc->function->cvt_offset
77 #define CHECK_CVT_OFFSET() \
79 if (!_jitc->function->cvt_offset) { \
81 _jitc->function->cvt_offset = \
82 jit_allocai(sizeof(jit_float64_t)); \
89 #if __X32 || __CYGWIN__ || _WIN32
90 typedef jit_pointer_t jit_va_list_t;
92 typedef struct jit_va_list {
97 /* Declared explicitly as int64 for the x32 abi */
126 #define compute_framesize() _compute_framesize(_jit)
127 static void _compute_framesize(jit_state_t*);
128 #define patch(instr, node) _patch(_jit, instr, node)
129 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
130 #define sse_from_x87_f(r0, r1) _sse_from_x87_f(_jit, r0, r1)
131 static void _sse_from_x87_f(jit_state_t*,jit_int32_t,jit_int32_t);
132 #define sse_from_x87_d(r0, r1) _sse_from_x87_d(_jit, r0, r1)
133 static void _sse_from_x87_d(jit_state_t*,jit_int32_t,jit_int32_t);
134 #define x87_from_sse_f(r0, r1) _x87_from_sse_f(_jit, r0, r1)
135 static void _x87_from_sse_f(jit_state_t*,jit_int32_t,jit_int32_t);
136 #define x87_from_sse_d(r0, r1) _x87_from_sse_d(_jit, r0, r1)
137 static void _x87_from_sse_d(jit_state_t*,jit_int32_t,jit_int32_t);
140 # include "jit_x86-cpu.c"
141 # include "jit_x86-sse.c"
142 # include "jit_x86-x87.c"
149 jit_register_t _rvs[] = {
151 { rc(gpr) | rc(rg8) | 0, "%eax" },
152 { rc(gpr) | rc(rg8) | 1, "%ecx" },
153 { rc(gpr) | rc(rg8) | 2, "%edx" },
154 { rc(sav) | rc(rg8) | rc(gpr) | 3, "%ebx" },
155 { rc(sav) | rc(gpr) | 6, "%esi" },
156 { rc(sav) | rc(gpr) | 7, "%edi" },
157 { rc(sav) | 4, "%esp" },
158 { rc(sav) | 5, "%ebp" },
159 { rc(xpr) | rc(fpr) | 0, "%xmm0" },
160 { rc(xpr) | rc(fpr) | 1, "%xmm1" },
161 { rc(xpr) | rc(fpr) | 2, "%xmm2" },
162 { rc(xpr) | rc(fpr) | 3, "%xmm3" },
163 { rc(xpr) | rc(fpr) | 4, "%xmm4" },
164 { rc(xpr) | rc(fpr) | 5, "%xmm5" },
165 { rc(xpr) | rc(fpr) | 6, "%xmm6" },
166 { rc(xpr) | rc(fpr) | 7, "%xmm7" },
167 { rc(fpr) | 0, "st(0)" },
168 { rc(fpr) | 1, "st(1)" },
169 { rc(fpr) | 2, "st(2)" },
170 { rc(fpr) | 3, "st(3)" },
171 { rc(fpr) | 4, "st(4)" },
172 { rc(fpr) | 5, "st(5)" },
173 { rc(fpr) | 6, "st(6)" },
174 { rc(fpr) | 7, "st(7)" },
176 # if __CYGWIN__ || _WIN32
177 { rc(gpr) | rc(rg8) | 0, "%rax" },
178 { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" },
179 { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" },
180 { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
181 { rc(sav) | rc(gpr) | 7, "%rdi" },
182 { rc(sav) | rc(gpr) | 6, "%rsi" },
183 { rc(sav) | rc(gpr) | 12, "%r12" },
184 { rc(sav) | rc(gpr) | 13, "%r13" },
185 { rc(sav) | rc(gpr) | 14, "%r14" },
186 { rc(sav) | rc(gpr) | 15, "%r15" },
187 { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
188 { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
189 { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
190 { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
191 { rc(sav) | 4, "%rsp" },
192 { rc(sav) | 5, "%rbp" },
193 { rc(xpr) | rc(fpr) | 4, "%xmm4" },
194 { rc(xpr) | rc(fpr) | 5, "%xmm5" },
195 { rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" },
196 { rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" },
197 { rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" },
198 { rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" },
199 { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" },
200 { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" },
201 { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" },
202 { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" },
203 { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" },
204 { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" },
205 { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
206 { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
207 { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
208 { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
210 /* %rax is a pseudo flag argument for varargs functions */
211 { rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" },
212 { rc(gpr) | rc(rg8) | 10, "%r10" },
213 { rc(gpr) | rc(rg8) | 11, "%r11" },
214 { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
215 { rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" },
216 { rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" },
217 { rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" },
218 { rc(sav) | rc(gpr) | rc(rg8) | 12, "%r12" },
219 { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
220 { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
221 { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
222 { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
223 { rc(arg) | rc(rg8) | rc(gpr) | 6, "%rsi" },
224 { rc(arg) | rc(rg8) | rc(gpr) | 7, "%rdi" },
225 { rc(sav) | 4, "%rsp" },
226 { rc(sav) | 5, "%rbp" },
227 { rc(xpr) | rc(fpr) | 8, "%xmm8" },
228 { rc(xpr) | rc(fpr) | 9, "%xmm9" },
229 { rc(xpr) | rc(fpr) | 10, "%xmm10" },
230 { rc(xpr) | rc(fpr) | 11, "%xmm11" },
231 { rc(xpr) | rc(fpr) | 12, "%xmm12" },
232 { rc(xpr) | rc(fpr) | 13, "%xmm13" },
233 { rc(xpr) | rc(fpr) | 14, "%xmm14" },
234 { rc(xpr) | rc(fpr) | 15, "%xmm15" },
235 { rc(xpr) | rc(arg) | rc(fpr) | 7, "%xmm7" },
236 { rc(xpr) | rc(arg) | rc(fpr) | 6, "%xmm6" },
237 { rc(xpr) | rc(arg) | rc(fpr) | 5, "%xmm5" },
238 { rc(xpr) | rc(arg) | rc(fpr) | 4, "%xmm4" },
239 { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
240 { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
241 { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
242 { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
244 { rc(fpr) | 0, "st(0)" },
245 { rc(fpr) | 1, "st(1)" },
246 { rc(fpr) | 2, "st(2)" },
247 { rc(fpr) | 3, "st(3)" },
248 { rc(fpr) | 4, "st(4)" },
249 { rc(fpr) | 5, "st(5)" },
250 { rc(fpr) | 6, "st(6)" },
251 { rc(fpr) | 7, "st(7)" },
253 { _NOREG, "<none>" },
256 static jit_int32_t iregs[] = {
259 #elif (__CYGWIN__ || _WIN32)
260 _RBX, _RDI, _RSI, _R12, _R13, _R14, _R15,
262 _R15, _R14, _R13, _R12, _RBX,
266 #if __X64 && (__CYGWIN__ || _WIN32)
267 static jit_int32_t fregs[] = {
268 _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
279 /* eax=7 and ecx=0 */
281 jit_uword_t fsgsbase : 1;
282 jit_uword_t IA32_TSC_ADJUST : 1;
284 jit_uword_t bmi1 : 1;
286 jit_uword_t avx2 : 1;
287 jit_uword_t FDP_EXCPTN_ONLY : 1;
288 jit_uword_t smep : 1;
289 jit_uword_t bmi2 : 1;
290 jit_uword_t erms : 1;
291 jit_uword_t invpcid : 1;
293 jit_uword_t rdt_m_pqm : 1;
294 jit_uword_t dep_FPU_CS_DS : 1;
296 jit_uword_t rdt_a_pqe : 1;
297 jit_uword_t avx512_f : 1;
298 jit_uword_t avx512_dq : 1;
299 jit_uword_t rdseed : 1;
301 jit_uword_t smap : 1;
302 jit_uword_t avx512_ifma : 1;
303 jit_uword_t __reserved0 : 1;
304 jit_uword_t clflushopt : 1;
305 jit_uword_t clwb : 1;
307 jit_uword_t avx512_pf : 1;
308 jit_uword_t avx512_er : 1;
309 jit_uword_t avx512_cd : 1;
311 jit_uword_t avx512_bw : 1;
312 jit_uword_t avx512_vl : 1;
319 jit_uint32_t sse3 : 1;
320 jit_uint32_t pclmulqdq : 1;
321 jit_uint32_t dtes64 : 1; /* amd reserved */
322 jit_uint32_t monitor : 1;
323 jit_uint32_t ds_cpl : 1; /* amd reserved */
324 jit_uint32_t vmx : 1; /* amd reserved */
325 jit_uint32_t smx : 1; /* amd reserved */
326 jit_uint32_t est : 1; /* amd reserved */
327 jit_uint32_t tm2 : 1; /* amd reserved */
328 jit_uint32_t ssse3 : 1;
329 jit_uint32_t cntx_id : 1; /* amd reserved */
330 jit_uint32_t __reserved0 : 1;
331 jit_uint32_t fma : 1;
332 jit_uint32_t cmpxchg16b : 1;
333 jit_uint32_t xtpr : 1; /* amd reserved */
334 jit_uint32_t pdcm : 1; /* amd reserved */
335 jit_uint32_t __reserved1 : 1;
336 jit_uint32_t pcid : 1; /* amd reserved */
337 jit_uint32_t dca : 1; /* amd reserved */
338 jit_uint32_t sse4_1 : 1;
339 jit_uint32_t sse4_2 : 1;
340 jit_uint32_t x2apic : 1; /* amd reserved */
341 jit_uint32_t movbe : 1; /* amd reserved */
342 jit_uint32_t popcnt : 1;
343 jit_uint32_t tsc : 1; /* amd reserved */
344 jit_uint32_t aes : 1;
345 jit_uint32_t xsave : 1;
346 jit_uint32_t osxsave : 1;
347 jit_uint32_t avx : 1;
348 jit_uint32_t __reserved2 : 1; /* amd F16C */
349 jit_uint32_t __reserved3 : 1;
350 jit_uint32_t __alwayszero : 1; /* amd RAZ */
357 jit_uint32_t fpu : 1;
358 jit_uint32_t vme : 1;
360 jit_uint32_t pse : 1;
361 jit_uint32_t tsc : 1;
362 jit_uint32_t msr : 1;
363 jit_uint32_t pae : 1;
364 jit_uint32_t mce : 1;
365 jit_uint32_t cmpxchg8b : 1;
366 jit_uint32_t apic : 1;
367 jit_uint32_t __reserved0 : 1;
368 jit_uint32_t sep : 1;
369 jit_uint32_t mtrr : 1;
370 jit_uint32_t pge : 1;
371 jit_uint32_t mca : 1;
372 jit_uint32_t cmov : 1;
373 jit_uint32_t pat : 1;
374 jit_uint32_t pse36 : 1;
375 jit_uint32_t psn : 1; /* amd reserved */
376 jit_uint32_t clfsh : 1;
377 jit_uint32_t __reserved1 : 1;
378 jit_uint32_t ds : 1; /* amd reserved */
379 jit_uint32_t acpi : 1; /* amd reserved */
380 jit_uint32_t mmx : 1;
381 jit_uint32_t fxsr : 1;
382 jit_uint32_t sse : 1;
383 jit_uint32_t sse2 : 1;
384 jit_uint32_t ss : 1; /* amd reserved */
385 jit_uint32_t htt : 1;
386 jit_uint32_t tm : 1; /* amd reserved */
387 jit_uint32_t __reserved2 : 1;
388 jit_uint32_t pbe : 1; /* amd reserved */
398 /* adapted from glibc __sysconf */
399 __asm__ volatile ("pushfl;\n\t"
401 "movl $0x240000, %1;\n\t"
410 : "=r" (flags), "=r" (ac));
412 /* i386 or i486 without cpuid */
413 if ((ac & (1 << 21)) == 0)
414 /* probably without x87 as well */
418 /* query %eax = 1 function */
419 #if __X32 || __X64_32
420 __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
422 __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
424 : "=a" (eax), "=r" (ebx.cpuid),
425 "=c" (ecx.cpuid), "=d" (edx.cpuid)
428 jit_cpu.fpu = edx.bits.fpu;
429 jit_cpu.cmpxchg8b = edx.bits.cmpxchg8b;
430 jit_cpu.cmov = edx.bits.cmov;
431 jit_cpu.mmx = edx.bits.mmx;
432 jit_cpu.sse = edx.bits.sse;
433 jit_cpu.sse2 = edx.bits.sse2;
434 jit_cpu.sse3 = ecx.bits.sse3;
435 jit_cpu.pclmulqdq = ecx.bits.pclmulqdq;
436 jit_cpu.ssse3 = ecx.bits.ssse3;
437 jit_cpu.fma = ecx.bits.fma;
438 jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b;
439 jit_cpu.sse4_1 = ecx.bits.sse4_1;
440 jit_cpu.sse4_2 = ecx.bits.sse4_2;
441 jit_cpu.movbe = ecx.bits.movbe;
442 jit_cpu.popcnt = ecx.bits.popcnt;
443 jit_cpu.aes = ecx.bits.aes;
444 jit_cpu.avx = ecx.bits.avx;
446 /* query %eax = 7 and ecx = 0 function */
448 __asm__ volatile ("cpuid"
449 : "=a" (eax), "=b" (ebx.cpuid), "=c" (ecx), "=d" (edx)
452 jit_cpu.adx = ebx.bits.adx;
453 jit_cpu.bmi2 = ebx.bits.bmi2;
456 /* query %eax = 0x80000001 function */
459 __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
461 __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
463 : "=a" (eax), "=r" (ebx.cpuid),
464 "=c" (ecx.cpuid), "=d" (edx.cpuid)
466 jit_cpu.lahf = !!(ecx.cpuid & 1);
467 jit_cpu.abm = !!(ecx.cpuid & 32);
468 jit_cpu.fma4 = !!(ecx.cpuid & (1 << 16));
473 _jit_init(jit_state_t *_jit)
477 static jit_bool_t first = 1;
480 _jitc->reglen = jit_size(_rvs) - 1;
484 for (regno = _jitc->reglen; regno >= 0; regno--) {
485 if (_rvs[regno].spec & jit_class_xpr)
486 _rvs[regno].spec = 0;
495 _jit_prolog(jit_state_t *_jit)
501 assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
502 jit_regset_set_ui(&_jitc->regsav, 0);
503 offset = _jitc->functions.offset;
504 if (offset >= _jitc->functions.length) {
505 jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
506 _jitc->functions.length * sizeof(jit_function_t),
507 (_jitc->functions.length + 16) * sizeof(jit_function_t));
508 _jitc->functions.length += 16;
510 _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
511 /* One extra stack slot for implicit saved returned address */
512 _jitc->function->self.size = stack_framesize + REAL_WORDSIZE;
513 _jitc->function->self.argi = _jitc->function->self.argf =
514 _jitc->function->self.aoff = _jitc->function->self.alen = 0;
515 _jitc->function->cvt_offset = 0;
516 #if __X64 && (__CYGWIN__ || _WIN32)
517 /* force framepointer */
520 _jitc->function->self.call = jit_call_default;
521 jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
522 _jitc->reglen * sizeof(jit_int32_t));
524 /* _no_link here does not mean the jit_link() call can be removed
526 * _jitc->function->prolog = jit_new_node(jit_code_prolog);
528 _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
529 jit_link(_jitc->function->prolog);
530 _jitc->function->prolog->w.w = offset;
531 _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
533 * v: offset in blocks vector
534 * w: offset in functions vector
536 _jitc->function->epilog->w.w = offset;
538 jit_regset_new(&_jitc->function->regset);
542 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
544 assert(_jitc->function);
547 /* Stack is 4 bytes aligned but jit functions keep it 8 bytes aligned.
548 * Called functions have 16 byte aligned stack. */
549 if (!_jitc->function->self.aoff)
550 _jitc->function->self.aoff = -4;
553 case 0: case 1: break;
554 case 2: _jitc->function->self.aoff &= -2; break;
555 case 3: case 4: _jitc->function->self.aoff &= -4; break;
556 default: _jitc->function->self.aoff &= -8; break;
558 _jitc->function->self.aoff -= length;
560 /* jit_allocai() may be called from jit_x86-cpu.c, and force a function
561 * generation restart on some conditions: div/rem and qmul/qdiv, due
562 * to registers constraints.
563 * The check is to prevent an assertion of a jit_xyz() being called
564 * during code generation, and attempting to add a node to the tail
565 * of the current IR generation. */
566 if (!_jitc->realize) {
567 jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
571 return (_jitc->function->self.aoff);
575 _jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
578 assert(_jitc->function);
579 jit_inc_synth_ww(allocar, u, v);
580 if (!_jitc->function->allocar) {
581 _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
582 _jitc->function->allocar = 1;
584 reg = jit_get_reg(jit_class_gpr);
586 jit_andi(reg, reg, -16);
587 jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
589 jit_addr(JIT_SP, JIT_SP, reg);
590 jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
596 _jit_ret(jit_state_t *_jit)
599 assert(_jitc->function);
603 jit_patch_at(instr, _jitc->function->epilog);
608 _jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
610 jit_code_inc_synth_w(code, u);
611 jit_movr(JIT_RET, u);
617 _jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
619 jit_code_inc_synth_w(code, u);
620 jit_movi(JIT_RET, u);
626 _jit_retr_f(jit_state_t *_jit, jit_int32_t u)
628 jit_inc_synth_w(retr_f, u);
630 jit_movr_f(JIT_FRET, u);
638 _jit_reti_f(jit_state_t *_jit, jit_float32_t u)
640 jit_inc_synth_f(reti_f, u);
641 jit_movi_f(JIT_FRET, u);
647 _jit_retr_d(jit_state_t *_jit, jit_int32_t u)
649 jit_inc_synth_w(retr_d, u);
651 jit_movr_d(JIT_FRET, u);
659 _jit_reti_d(jit_state_t *_jit, jit_float64_t u)
661 jit_inc_synth_d(reti_d, u);
662 jit_movi_d(JIT_FRET, u);
668 _jit_epilog(jit_state_t *_jit)
670 assert(_jitc->function);
671 assert(_jitc->function->epilog->next == NULL);
672 jit_link(_jitc->function->epilog);
673 _jitc->function = NULL;
677 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
679 if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
680 return (jit_arg_reg_p(u->u.w));
681 assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
682 return (jit_arg_f_reg_p(u->u.w));
686 _jit_ellipsis(jit_state_t *_jit)
688 jit_inc_synth(ellipsis);
690 if (_jitc->prepare) {
692 /* Remember that a varargs function call is being constructed. */
693 assert(!(_jitc->function->call.call & jit_call_varargs));
694 _jitc->function->call.call |= jit_call_varargs;
698 /* Remember the current function is varargs. */
699 assert(!(_jitc->function->self.call & jit_call_varargs));
700 _jitc->function->self.call |= jit_call_varargs;
702 #if __X64 && !(__CYGWIN__ || _WIN32)
703 /* Allocate va_list like object in the stack.
704 * If applicable, with enough space to save all argument
705 * registers, and use fixed offsets for them. */
706 _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
708 /* Initialize gp offset in save area. */
709 if (jit_arg_reg_p(_jitc->function->self.argi))
710 _jitc->function->vagp = _jitc->function->self.argi * 8;
712 _jitc->function->vagp = va_gp_max_offset;
714 /* Initialize fp offset in save area. */
715 if (jit_arg_f_reg_p(_jitc->function->self.argf))
716 _jitc->function->vafp = _jitc->function->self.argf * 16 +
719 _jitc->function->vafp = va_fp_max_offset;
726 _jit_va_push(jit_state_t *_jit, jit_int32_t u)
728 jit_inc_synth_w(va_push, u);
734 _jit_arg(jit_state_t *_jit, jit_code_t code)
738 assert(_jitc->function);
739 assert(!(_jitc->function->self.call & jit_call_varargs));
740 #if STRONG_TYPE_CHECKING
741 assert(code >= jit_code_arg_c && code <= jit_code_arg);
744 if (jit_arg_reg_p(_jitc->function->self.argi)) {
745 offset = _jitc->function->self.argi++;
746 # if __CYGWIN__ || _WIN32
747 _jitc->function->self.size += sizeof(jit_word_t);
753 offset = _jitc->function->self.size;
754 _jitc->function->self.size += REAL_WORDSIZE;
757 node = jit_new_node_ww(code, offset,
758 ++_jitc->function->self.argn);
764 _jit_arg_f(jit_state_t *_jit)
768 assert(_jitc->function);
769 assert(!(_jitc->function->self.call & jit_call_varargs));
771 # if __CYGWIN__ || _WIN32
772 if (jit_arg_reg_p(_jitc->function->self.argi)) {
773 offset = _jitc->function->self.argi++;
774 _jitc->function->self.size += sizeof(jit_word_t);
777 if (jit_arg_f_reg_p(_jitc->function->self.argf))
778 offset = _jitc->function->self.argf++;
783 offset = _jitc->function->self.size;
784 _jitc->function->self.size += REAL_WORDSIZE;
787 node = jit_new_node_ww(jit_code_arg_f, offset,
788 ++_jitc->function->self.argn);
794 _jit_arg_d(jit_state_t *_jit)
798 assert(_jitc->function);
799 assert(!(_jitc->function->self.call & jit_call_varargs));
801 # if __CYGWIN__ || _WIN32
802 if (jit_arg_reg_p(_jitc->function->self.argi)) {
803 offset = _jitc->function->self.argi++;
804 _jitc->function->self.size += sizeof(jit_word_t);
807 if (jit_arg_f_reg_p(_jitc->function->self.argf))
808 offset = _jitc->function->self.argf++;
813 offset = _jitc->function->self.size;
814 _jitc->function->self.size += sizeof(jit_float64_t);
817 node = jit_new_node_ww(jit_code_arg_d, offset,
818 ++_jitc->function->self.argn);
824 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
826 assert_arg_type(v->code, jit_code_arg_c);
827 jit_inc_synth_wp(getarg_c, u, v);
829 if (jit_arg_reg_p(v->u.w))
830 jit_extr_c(u, JIT_RA0 - v->u.w);
834 jit_node_t *node = jit_ldxi_c(u, _RBP, v->u.w);
835 jit_link_alist(node);
841 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
843 assert_arg_type(v->code, jit_code_arg_c);
844 jit_inc_synth_wp(getarg_uc, u, v);
846 if (jit_arg_reg_p(v->u.w))
847 jit_extr_uc(u, JIT_RA0 - v->u.w);
851 jit_node_t *node = jit_ldxi_uc(u, _RBP, v->u.w);
852 jit_link_alist(node);
858 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
860 assert_arg_type(v->code, jit_code_arg_s);
861 jit_inc_synth_wp(getarg_s, u, v);
863 if (jit_arg_reg_p(v->u.w))
864 jit_extr_s(u, JIT_RA0 - v->u.w);
868 jit_node_t *node = jit_ldxi_s(u, _RBP, v->u.w);
869 jit_link_alist(node);
875 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
877 assert_arg_type(v->code, jit_code_arg_s);
878 jit_inc_synth_wp(getarg_us, u, v);
880 if (jit_arg_reg_p(v->u.w))
881 jit_extr_us(u, JIT_RA0 - v->u.w);
885 jit_node_t *node = jit_ldxi_us(u, _RBP, v->u.w);
886 jit_link_alist(node);
892 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
894 assert_arg_type(v->code, jit_code_arg_i);
895 jit_inc_synth_wp(getarg_i, u, v);
897 if (jit_arg_reg_p(v->u.w)) {
899 jit_movr(u, JIT_RA0 - v->u.w);
901 jit_extr_i(u, JIT_RA0 - v->u.w);
907 jit_node_t *node = jit_ldxi_i(u, _RBP, v->u.w);
908 jit_link_alist(node);
913 #if __X64 && !__X64_32
915 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
917 assert_arg_type(v->code, jit_code_arg_i);
918 jit_inc_synth_wp(getarg_ui, u, v);
919 if (jit_arg_reg_p(v->u.w))
920 jit_extr_ui(u, JIT_RA0 - v->u.w);
922 jit_node_t *node = jit_ldxi_ui(u, _RBP, v->u.w);
923 jit_link_alist(node);
929 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
931 assert_arg_type(v->code, jit_code_arg_l);
932 jit_inc_synth_wp(getarg_l, u, v);
933 if (jit_arg_reg_p(v->u.w))
934 jit_movr(u, JIT_RA0 - v->u.w);
936 jit_node_t *node = jit_ldxi_l(u, _RBP, v->u.w);
937 jit_link_alist(node);
944 _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
946 assert_putarg_type(code, v->code);
947 jit_code_inc_synth_wp(code, u, v);
949 if (jit_arg_reg_p(v->u.w))
950 jit_movr(JIT_RA0 - v->u.w, u);
954 jit_node_t *node = jit_stxi(v->u.w, _RBP, u);
955 jit_link_alist(node);
961 _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
964 assert_putarg_type(code, v->code);
965 jit_code_inc_synth_wp(code, u, v);
967 if (jit_arg_reg_p(v->u.w))
968 jit_movi(JIT_RA0 - v->u.w, u);
973 regno = jit_get_reg(jit_class_gpr);
975 node = jit_stxi(v->u.w, _RBP, regno);
976 jit_link_alist(node);
977 jit_unget_reg(regno);
983 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
985 assert(v->code == jit_code_arg_f);
986 jit_inc_synth_wp(getarg_f, u, v);
988 if (jit_arg_f_reg_p(v->u.w))
989 jit_movr_f(u, _XMM0 - v->u.w);
993 jit_node_t *node = jit_ldxi_f(u, _RBP, v->u.w);
994 jit_link_alist(node);
1000 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
1002 assert(v->code == jit_code_arg_f);
1003 jit_inc_synth_wp(putargr_f, u, v);
1005 if (jit_arg_f_reg_p(v->u.w))
1006 jit_movr_f(_XMM0 - v->u.w, u);
1010 jit_node_t *node = jit_stxi_f(v->u.w, _RBP, u);
1011 jit_link_alist(node);
1017 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
1020 assert(v->code == jit_code_arg_f);
1021 jit_inc_synth_fp(putargi_f, u, v);
1023 if (jit_arg_f_reg_p(v->u.w))
1024 jit_movi_f(_XMM0 - v->u.w, u);
1029 regno = jit_get_reg(jit_class_fpr);
1030 jit_movi_f(regno, u);
1031 node = jit_stxi_f(v->u.w, _RBP, regno);
1032 jit_link_alist(node);
1033 jit_unget_reg(regno);
1039 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
1041 assert(v->code == jit_code_arg_d);
1042 jit_inc_synth_wp(getarg_d, u, v);
1044 if (jit_arg_f_reg_p(v->u.w))
1045 jit_movr_d(u, _XMM0 - v->u.w);
1049 jit_node_t *node = jit_ldxi_d(u, _RBP, v->u.w);
1050 jit_link_alist(node);
1056 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
1058 assert(v->code == jit_code_arg_d);
1059 jit_inc_synth_wp(putargr_d, u, v);
1061 if (jit_arg_f_reg_p(v->u.w))
1062 jit_movr_d(_XMM0 - v->u.w, u);
1066 jit_node_t *node = jit_stxi_d(v->u.w, _RBP, u);
1067 jit_link_alist(node);
1073 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
1076 assert(v->code == jit_code_arg_d);
1077 jit_inc_synth_dp(putargi_d, u, v);
1079 if (jit_arg_f_reg_p(v->u.w))
1080 jit_movi_d(_XMM0 - v->u.w, u);
1085 regno = jit_get_reg(jit_class_fpr);
1086 jit_movi_d(regno, u);
1087 node = jit_stxi_d(v->u.w, _RBP, regno);
1088 jit_link_alist(node);
1089 jit_unget_reg(regno);
1095 _jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
1097 assert(_jitc->function);
1098 jit_code_inc_synth_w(code, u);
1101 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1102 jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
1103 ++_jitc->function->call.argi;
1104 # if __CYGWIN__ || _WIN32
1105 if (_jitc->function->call.call & jit_call_varargs)
1106 jit_stxi(_jitc->function->call.size, _RSP, u);
1107 _jitc->function->call.size += sizeof(jit_word_t);
1113 jit_stxi(_jitc->function->call.size, _RSP, u);
1114 _jitc->function->call.size += REAL_WORDSIZE;
1121 _jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
1124 assert(_jitc->function);
1125 jit_code_inc_synth_w(code, u);
1128 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1129 jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
1130 # if __CYGWIN__ || _WIN32
1131 if (_jitc->function->call.call & jit_call_varargs)
1132 jit_stxi(_jitc->function->call.size, _RSP,
1133 JIT_RA0 - _jitc->function->call.argi);
1134 _jitc->function->call.size += sizeof(jit_word_t);
1136 ++_jitc->function->call.argi;
1141 regno = jit_get_reg(jit_class_gpr);
1143 jit_stxi(_jitc->function->call.size, _RSP, regno);
1144 _jitc->function->call.size += REAL_WORDSIZE;
1145 jit_unget_reg(regno);
1152 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
1154 assert(_jitc->function);
1155 jit_inc_synth_w(pushargr_f, u);
1158 # if __CYGWIN__ || _WIN32
1159 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1160 jit_movr_f(_XMM0 - _jitc->function->call.argi, u);
1161 if (_jitc->function->call.call & jit_call_varargs) {
1162 jit_stxi_f(_jitc->function->call.size, _RSP,
1163 _XMM0 - _jitc->function->call.argi);
1164 jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1165 _jitc->function->call.size);
1167 ++_jitc->function->call.argi;
1168 _jitc->function->call.size += sizeof(jit_word_t);
1171 if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
1172 jit_movr_f(_XMM0 - _jitc->function->call.argf, u);
1173 ++_jitc->function->call.argf;
1179 jit_stxi_f(_jitc->function->call.size, _RSP, u);
1180 _jitc->function->call.size += REAL_WORDSIZE;
1187 _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
1190 assert(_jitc->function);
1191 jit_inc_synth_f(pushargi_f, u);
1194 # if __CYGWIN__ || _WIN32
1195 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1196 jit_movi_f(_XMM0 - _jitc->function->call.argi, u);
1197 if (_jitc->function->call.call & jit_call_varargs) {
1198 jit_stxi_f(_jitc->function->call.size, _RSP,
1199 _XMM0 - _jitc->function->call.argi);
1200 jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1201 _jitc->function->call.size);
1203 ++_jitc->function->call.argi;
1204 _jitc->function->call.size += sizeof(jit_word_t);
1207 if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1208 jit_movi_f(_XMM0 - _jitc->function->call.argf, u);
1209 ++_jitc->function->call.argf;
1215 regno = jit_get_reg(jit_class_fpr);
1216 jit_movi_f(regno, u);
1217 jit_stxi_f(_jitc->function->call.size, _RSP, regno);
1218 _jitc->function->call.size += REAL_WORDSIZE;
1219 jit_unget_reg(regno);
1226 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
1228 assert(_jitc->function);
1229 jit_inc_synth_w(pushargr_d, u);
1232 # if __CYGWIN__ || _WIN32
1233 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1234 jit_movr_d(_XMM0 - _jitc->function->call.argi, u);
1235 if (_jitc->function->call.call & jit_call_varargs) {
1236 jit_stxi_d(_jitc->function->call.size, _RSP,
1237 _XMM0 - _jitc->function->call.argi);
1238 jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1239 _jitc->function->call.size);
1241 ++_jitc->function->call.argi;
1242 _jitc->function->call.size += sizeof(jit_word_t);
1245 if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1246 jit_movr_d(_XMM0 - _jitc->function->call.argf, u);
1247 ++_jitc->function->call.argf;
1253 jit_stxi_d(_jitc->function->call.size, _RSP, u);
1254 _jitc->function->call.size += sizeof(jit_float64_t);
1261 _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
1264 assert(_jitc->function);
1265 jit_inc_synth_d(pushargi_d, u);
1268 # if __CYGWIN__ || _WIN32
1269 if (jit_arg_reg_p(_jitc->function->call.argi)) {
1270 jit_movi_d(_XMM0 - _jitc->function->call.argi, u);
1271 if (_jitc->function->call.call & jit_call_varargs) {
1272 jit_stxi_d(_jitc->function->call.size, _RSP,
1273 _XMM0 - _jitc->function->call.argi);
1274 jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1275 _jitc->function->call.size);
1277 ++_jitc->function->call.argi;
1278 _jitc->function->call.size += sizeof(jit_word_t);
1281 if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1282 jit_movi_d(_XMM0 - _jitc->function->call.argf, u);
1283 ++_jitc->function->call.argf;
1289 regno = jit_get_reg(jit_class_fpr);
1290 jit_movi_d(regno, u);
1291 jit_stxi_d(_jitc->function->call.size, _RSP, regno);
1292 _jitc->function->call.size += sizeof(jit_float64_t);
1293 jit_unget_reg(regno);
1300 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
1305 spec = jit_class(_rvs[regno].spec);
1306 if (spec & jit_class_arg) {
1307 if (spec & jit_class_gpr) {
1308 regno = JIT_RA0 - regno;
1309 if (regno >= 0 && regno < node->v.w)
1312 else if (spec & jit_class_fpr) {
1313 regno = _XMM0 - regno;
1314 if (regno >= 0 && regno < node->w.w)
1323 _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
1327 assert(_jitc->function);
1330 jit_inc_synth_w(finishr, r0);
1331 if (_jitc->function->self.alen < _jitc->function->call.size)
1332 _jitc->function->self.alen = _jitc->function->call.size;
1334 # if !(__CYGWIN__ || _WIN32)
1335 if (_jitc->function->call.call & jit_call_varargs) {
1336 if (jit_regno(reg) == _RAX) {
1337 reg = jit_get_reg(jit_class_gpr);
1338 jit_movr(reg, _RAX);
1340 if (_jitc->function->call.argf)
1341 jit_movi(_RAX, _jitc->function->call.argf);
1349 call = jit_callr(reg);
1350 call->v.w = _jitc->function->call.argi;
1351 call->w.w = _jitc->function->call.argf;
1352 _jitc->function->call.argi = _jitc->function->call.argf =
1353 _jitc->function->call.size = 0;
1359 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
1362 assert(_jitc->function);
1364 jit_inc_synth_w(finishi, (jit_word_t)i0);
1365 if (_jitc->function->self.alen < _jitc->function->call.size)
1366 _jitc->function->self.alen = _jitc->function->call.size;
1368 # if !(__CYGWIN__ || _WIN32)
1369 if (_jitc->function->call.call & jit_call_varargs) {
1370 if (_jitc->function->call.argf)
1371 jit_movi(_RAX, _jitc->function->call.argf);
1378 node = jit_calli(i0);
1379 node->v.w = _jitc->function->call.argi;
1380 node->w.w = _jitc->function->call.argf;
1381 _jitc->function->call.argi = _jitc->function->call.argf =
1382 _jitc->function->call.size = 0;
1389 _jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
1391 jit_inc_synth_w(retval_c, r0);
1392 jit_extr_c(r0, JIT_RET);
1397 _jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
1399 jit_inc_synth_w(retval_uc, r0);
1400 jit_extr_uc(r0, JIT_RET);
1405 _jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
1407 jit_inc_synth_w(retval_s, r0);
1408 jit_extr_s(r0, JIT_RET);
1413 _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
1415 jit_inc_synth_w(retval_us, r0);
1416 jit_extr_us(r0, JIT_RET);
1421 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
1423 jit_inc_synth_w(retval_i, r0);
1424 #if __X32 || __X64_32
1426 jit_movr(r0, JIT_RET);
1428 jit_extr_i(r0, JIT_RET);
1433 #if __X64 && !__X64_32
1435 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
1437 jit_inc_synth_w(retval_ui, r0);
1438 jit_extr_ui(r0, JIT_RET);
1443 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
1445 jit_inc_synth_w(retval_l, r0);
1447 jit_movr(r0, JIT_RET);
1453 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
1455 jit_inc_synth_w(retval_f, r0);
1458 jit_movr_f(r0, JIT_FRET);
1464 _jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
1466 jit_inc_synth_w(retval_d, r0);
1469 jit_movr_d(r0, JIT_FRET);
1475 _emit_code(jit_state_t *_jit)
1485 jit_function_t func;
1486 #if DEVEL_DISASSEMBLER
1489 jit_int32_t patch_offset;
1491 #if DEVEL_DISASSEMBLER
1495 _jitc->function = NULL;
1497 jit_reglive_setup();
1501 undo.patch_offset = 0;
1502 #define case_rr(name, type) \
1503 case jit_code_##name##r##type: \
1504 name##r##type(rn(node->u.w), rn(node->v.w)); \
1506 #define case_rw(name, type) \
1507 case jit_code_##name##i##type: \
1508 name##i##type(rn(node->u.w), node->v.w); \
1510 #define case_rf(name, type) \
1511 case jit_code_##name##r##type: \
1512 if (jit_x87_reg_p(node->v.w)) \
1513 x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1515 sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1517 #define case_fr(name, type) \
1518 case jit_code_##name##r##type: \
1519 if (jit_x87_reg_p(node->u.w)) \
1520 x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1522 sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1524 #define case_fw(name, type) \
1525 case jit_code_##name##i##type: \
1526 if (jit_x87_reg_p(node->u.w)) \
1527 x87_##name##i##type(rn(node->u.w), node->v.w); \
1529 sse_##name##i##type(rn(node->u.w), node->v.w); \
1531 #define case_wr(name, type) \
1532 case jit_code_##name##i##type: \
1533 name##i##type(node->u.w, rn(node->v.w)); \
1535 #define case_wf(name, type) \
1536 case jit_code_##name##i##type: \
1537 if (jit_x87_reg_p(node->v.w)) \
1538 x87_##name##i##type(node->u.w, rn(node->v.w)); \
1540 sse_##name##i##type(node->u.w, rn(node->v.w)); \
1542 #define case_ff(name, type) \
1543 case jit_code_##name##r##type: \
1544 if (jit_x87_reg_p(node->u.w) && \
1545 jit_x87_reg_p(node->v.w)) \
1546 x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1548 sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \
1550 #define case_rrr(name, type) \
1551 case jit_code_##name##r##type: \
1552 name##r##type(rn(node->u.w), \
1553 rn(node->v.w), rn(node->w.w)); \
1555 #define case_rrrr(name, type) \
1556 case jit_code_##name##r##type: \
1557 name##r##type(rn(node->u.q.l), rn(node->u.q.h), \
1558 rn(node->v.w), rn(node->w.w)); \
1560 #define case_rqr(name, type) \
1561 case jit_code_##name##r##type: \
1562 if (jit_x87_reg_p(node->u.w) && \
1563 jit_x87_reg_p(node->v.q.l) && \
1564 jit_x87_reg_p(node->v.q.h) && \
1565 jit_x87_reg_p(node->w.w)) \
1566 x87_##name##r##type(rn(node->u.w), \
1571 assert(jit_sse_reg_p(node->u.w) && \
1572 jit_sse_reg_p(node->v.q.l) && \
1573 jit_sse_reg_p(node->v.q.h) && \
1574 jit_sse_reg_p(node->w.w)); \
1575 sse_##name##r##type(rn(node->u.w), \
1581 #define case_frr(name, type) \
1582 case jit_code_##name##r##type: \
1583 if (jit_x87_reg_p(node->u.w)) \
1584 x87_##name##r##type(rn(node->u.w), \
1585 rn(node->v.w), rn(node->w.w)); \
1587 sse_##name##r##type(rn(node->u.w), \
1588 rn(node->v.w), rn(node->w.w)); \
1590 #define case_rrf(name, type) \
1591 case jit_code_##name##r##type: \
1592 if (jit_x87_reg_p(node->w.w)) \
1593 x87_##name##r##type(rn(node->u.w), \
1594 rn(node->v.w), rn(node->w.w)); \
1596 sse_##name##r##type(rn(node->u.w), \
1597 rn(node->v.w), rn(node->w.w)); \
1599 #define case_rrw(name, type) \
1600 case jit_code_##name##i##type: \
1601 name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
1603 #define case_rrrw(name, type) \
1604 case jit_code_##name##i##type: \
1605 name##i##type(rn(node->u.q.l), rn(node->u.q.h), \
1606 rn(node->v.w), node->w.w); \
1608 #define case_frw(name, type) \
1609 case jit_code_##name##i##type: \
1610 if (jit_x87_reg_p(node->u.w)) \
1611 x87_##name##i##type(rn(node->u.w), \
1612 rn(node->v.w), node->w.w); \
1614 sse_##name##i##type(rn(node->u.w), \
1615 rn(node->v.w), node->w.w); \
1617 #define case_wrr(name, type) \
1618 case jit_code_##name##i##type: \
1619 name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
1621 #define case_wrf(name, type) \
1622 case jit_code_##name##i##type: \
1623 if (jit_x87_reg_p(node->w.w)) \
1624 x87_##name##i##type(node->u.w, \
1625 rn(node->v.w), rn(node->w.w)); \
1627 sse_##name##i##type(node->u.w, \
1628 rn(node->v.w), rn(node->w.w)); \
1630 #define case_brr(name, type) \
1631 case jit_code_##name##r##type: \
1633 assert(temp->code == jit_code_label || \
1634 temp->code == jit_code_epilog); \
1635 if (temp->flag & jit_flag_patch) \
1636 name##r##type(temp->u.w, rn(node->v.w), \
1639 word = name##r##type(_jit->pc.w, \
1640 rn(node->v.w), rn(node->w.w)); \
1641 patch(word, node); \
1644 #define case_brw(name, type) \
1645 case jit_code_##name##i##type: \
1647 assert(temp->code == jit_code_label || \
1648 temp->code == jit_code_epilog); \
1649 if (temp->flag & jit_flag_patch) \
1650 name##i##type(temp->u.w, \
1651 rn(node->v.w), node->w.w); \
1653 word = name##i##type(_jit->pc.w, \
1654 rn(node->v.w), node->w.w); \
1655 patch(word, node); \
1658 #define case_rff(name, type) \
1659 case jit_code_##name##r##type: \
1660 if (jit_x87_reg_p(node->v.w) && \
1661 jit_x87_reg_p(node->w.w)) \
1662 x87_##name##r##type(rn(node->u.w), rn(node->v.w), \
1665 sse_##name##r##type(rn(node->u.w), rn(node->v.w), \
1668 #define case_rfw(name, type, size) \
1669 case jit_code_##name##i##type: \
1670 assert(node->flag & jit_flag_data); \
1671 if (jit_x87_reg_p(node->v.w)) \
1672 x87_##name##i##type(rn(node->u.w), rn(node->v.w), \
1673 (jit_float##size##_t *)node->w.n->u.w); \
1675 sse_##name##i##type(rn(node->u.w), rn(node->v.w), \
1676 (jit_float##size##_t *)node->w.n->u.w); \
1678 #define case_fff(name, type) \
1679 case jit_code_##name##r##type: \
1680 if (jit_x87_reg_p(node->u.w) && \
1681 jit_x87_reg_p(node->v.w) && \
1682 jit_x87_reg_p(node->w.w)) \
1683 x87_##name##r##type(rn(node->u.w), \
1684 rn(node->v.w), rn(node->w.w)); \
1686 sse_##name##r##type(rn(node->u.w), \
1687 rn(node->v.w), rn(node->w.w)); \
1689 #define case_ffw(name, type, size) \
1690 case jit_code_##name##i##type: \
1691 assert(node->flag & jit_flag_data); \
1692 if (jit_x87_reg_p(node->u.w) && \
1693 jit_x87_reg_p(node->v.w)) \
1694 x87_##name##i##type(rn(node->u.w), rn(node->v.w), \
1695 (jit_float##size##_t *)node->w.n->u.w); \
1697 sse_##name##i##type(rn(node->u.w), rn(node->v.w), \
1698 (jit_float##size##_t *)node->w.n->u.w); \
1700 #define case_bff(name, type) \
1701 case jit_code_b##name##r##type: \
1703 assert(temp->code == jit_code_label || \
1704 temp->code == jit_code_epilog); \
1705 if (temp->flag & jit_flag_patch) { \
1706 if (jit_x87_reg_p(node->v.w) && \
1707 jit_x87_reg_p(node->w.w)) \
1708 x87_b##name##r##type(temp->u.w, \
1709 rn(node->v.w), rn(node->w.w)); \
1711 sse_b##name##r##type(temp->u.w, \
1712 rn(node->v.w), rn(node->w.w)); \
1715 if (jit_x87_reg_p(node->v.w) && \
1716 jit_x87_reg_p(node->w.w)) \
1717 word = x87_b##name##r##type(_jit->pc.w, \
1718 rn(node->v.w), rn(node->w.w)); \
1720 word = sse_b##name##r##type(_jit->pc.w, \
1721 rn(node->v.w), rn(node->w.w)); \
1722 patch(word, node); \
1725 #define case_bfw(name, type, size) \
1726 case jit_code_b##name##i##type: \
1728 assert(temp->code == jit_code_label || \
1729 temp->code == jit_code_epilog); \
1730 if (temp->flag & jit_flag_patch) { \
1731 if (jit_x87_reg_p(node->v.w)) \
1732 x87_b##name##i##type(temp->u.w, \
1734 (jit_float##size##_t *)node->w.n->u.w); \
1736 sse_b##name##i##type(temp->u.w, \
1738 (jit_float##size##_t *)node->w.n->u.w); \
1741 if (jit_x87_reg_p(node->v.w)) \
1742 word = x87_b##name##i##type(_jit->pc.w, \
1744 (jit_float##size##_t *)node->w.n->u.w); \
1746 word = sse_b##name##i##type(_jit->pc.w, \
1748 (jit_float##size##_t *)node->w.n->u.w); \
1749 patch(word, node); \
1752 #if DEVEL_DISASSEMBLER
1755 for (node = _jitc->head; node; node = node->next) {
1756 if (_jit->pc.uc >= _jitc->code.end)
1759 #if DEVEL_DISASSEMBLER
1760 node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
1763 value = jit_classify(node->code);
1764 jit_regarg_set(node, value);
1765 switch (node->code) {
1766 case jit_code_align:
1767 /* Must align to a power of two */
1768 assert(!(node->u.w & (node->u.w - 1)));
1769 if ((word = _jit->pc.w & (node->u.w - 1)))
1770 nop(node->u.w - word);
1775 case jit_code_note: case jit_code_name:
1776 node->u.w = _jit->pc.w;
1778 case jit_code_label:
1779 if ((node->link || (node->flag & jit_flag_use)) &&
1780 (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
1781 nop(sizeof(jit_word_t) - word);
1782 /* remember label is defined */
1783 node->flag |= jit_flag_patch;
1784 node->u.w = _jit->pc.w;
1807 case_rrrr(qmul, _u);
1808 case_rrrw(qmul, _u);
1815 case_rrrr(qdiv, _u);
1816 case_rrrw(qdiv, _u);
1831 case_rrrr(qlsh, _u);
1832 case_rrrw(qlsh, _u);
1839 case_rrrr(qrsh, _u);
1840 case_rrrw(qrsh, _u);
1874 casr(rn(node->u.w), rn(node->v.w),
1875 rn(node->w.q.l), rn(node->w.q.h));
1878 casi(rn(node->u.w), node->v.w,
1879 rn(node->w.q.l), rn(node->w.q.h));
1885 if (node->flag & jit_flag_node) {
1887 if (temp->code == jit_code_data ||
1888 (temp->code == jit_code_label &&
1889 (temp->flag & jit_flag_patch)))
1890 movi(rn(node->u.w), temp->u.w);
1892 assert(temp->code == jit_code_label ||
1893 temp->code == jit_code_epilog);
1895 word = _jit->code.length -
1896 (_jit->pc.uc - _jit->code.ptr);
1897 if ((jit_int32_t)word == word)
1898 word = movi(rn(node->u.w), _jit->pc.w);
1901 word = movi_p(rn(node->u.w), node->v.w);
1906 movi(rn(node->u.w), node->v.w);
1910 #if __X64 && !__X64_32
1913 case_rr(bswap, _us);
1914 case_rr(bswap, _ui);
1915 #if __X64 && !__X64_32
1916 case_rr(bswap, _ul);
1919 extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1921 case jit_code_extr_u:
1922 extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1925 depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1928 depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
1934 #if __X64 && !__X64_32
1938 case_rf(trunc, _f_i);
1939 case_rf(trunc, _d_i);
1941 case_rf(trunc, _f_l);
1942 case_rf(trunc, _d_l);
1954 #if __X64 && !__X64_32
1970 #if __X64 && !__X64_32
1976 case jit_code_unldr:
1977 unldr(rn(node->u.w), rn(node->v.w), node->w.w);
1979 case jit_code_unldi:
1980 unldi(rn(node->u.w), node->v.w, node->w.w);
1982 case jit_code_unldr_u:
1983 unldr_u(rn(node->u.w), rn(node->v.w), node->w.w);
1985 case jit_code_unldi_u:
1986 unldi_u(rn(node->u.w), node->v.w, node->w.w);
1994 #if __X64 && !__X64_32
2004 #if __X64 && !__X64_32
2008 case jit_code_unstr:
2009 unstr(rn(node->u.w), rn(node->v.w), node->w.w);
2011 case jit_code_unsti:
2012 unsti(node->u.w, rn(node->v.w), node->w.w);
2040 case_brr(boadd, _u);
2041 case_brw(boadd, _u);
2044 case_brr(bxadd, _u);
2045 case_brw(bxadd, _u);
2048 case_brr(bosub, _u);
2049 case_brw(bosub, _u);
2052 case_brr(bxsub, _u);
2053 case_brw(bxsub, _u);
2055 case_ffw(add, _f, 32);
2057 case_ffw(sub, _f, 32);
2058 case_ffw(rsb, _f, 32);
2060 case_ffw(mul, _f, 32);
2062 case_ffw(div, _f, 32);
2073 case_rfw(lt, _f, 32);
2075 case_rfw(le, _f, 32);
2077 case_rfw(eq, _f, 32);
2079 case_rfw(ge, _f, 32);
2081 case_rfw(gt, _f, 32);
2083 case_rfw(ne, _f, 32);
2085 case_rfw(unlt, _f, 32);
2087 case_rfw(unle, _f, 32);
2089 case_rfw(uneq, _f, 32);
2091 case_rfw(unge, _f, 32);
2093 case_rfw(ungt, _f, 32);
2095 case_rfw(ltgt, _f, 32);
2097 case_rfw(ord, _f, 32);
2098 case_rff(unord, _f);
2099 case_rfw(unord, _f, 32);
2100 case jit_code_movr_f:
2101 if (jit_x87_reg_p(node->u.w)) {
2102 if (jit_x87_reg_p(node->v.w))
2103 x87_movr_f(rn(node->u.w), rn(node->v.w));
2105 x87_from_sse_f(rn(node->u.w), rn(node->v.w));
2108 if (jit_sse_reg_p(node->v.w))
2109 sse_movr_f(rn(node->u.w), rn(node->v.w));
2111 sse_from_x87_f(rn(node->u.w), rn(node->v.w));
2114 case jit_code_movi_f:
2115 assert(node->flag & jit_flag_data);
2116 if (jit_x87_reg_p(node->u.w))
2117 x87_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
2119 sse_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
2125 case jit_code_unldr_x:
2126 if (jit_x87_reg_p(node->u.w))
2127 x87_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w);
2129 sse_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w);
2131 case jit_code_unldi_x:
2132 if (jit_x87_reg_p(node->u.w))
2133 x87_unldi_x(rn(node->u.w), node->v.w, node->w.w);
2135 sse_unldi_x(rn(node->u.w), node->v.w, node->w.w);
2141 case jit_code_unstr_x:
2142 if (jit_x87_reg_p(node->v.w))
2143 x87_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w);
2145 sse_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w);
2147 case jit_code_unsti_x:
2148 if (jit_x87_reg_p(node->v.w))
2149 x87_unsti_x(node->u.w, rn(node->v.w), node->w.w);
2151 sse_unsti_x(node->u.w, rn(node->v.w), node->w.w);
2154 case_bfw(lt, _f, 32);
2156 case_bfw(le, _f, 32);
2158 case_bfw(eq, _f, 32);
2160 case_bfw(ge, _f, 32);
2162 case_bfw(gt, _f, 32);
2164 case_bfw(ne, _f, 32);
2166 case_bfw(unlt, _f, 32);
2168 case_bfw(unle, _f, 32);
2170 case_bfw(uneq, _f, 32);
2172 case_bfw(unge, _f, 32);
2174 case_bfw(ungt, _f, 32);
2176 case_bfw(ltgt, _f, 32);
2178 case_bfw(ord, _f, 32);
2179 case_bff(unord, _f);
2180 case_bfw(unord, _f, 32);
2182 case_ffw(add, _d, 64);
2184 case_ffw(sub, _d, 64);
2185 case_ffw(rsb, _d, 64);
2187 case_ffw(mul, _d, 64);
2189 case_ffw(div, _d, 64);
2200 case_rfw(lt, _d, 64);
2202 case_rfw(le, _d, 64);
2204 case_rfw(eq, _d, 64);
2206 case_rfw(ge, _d, 64);
2208 case_rfw(gt, _d, 64);
2210 case_rfw(ne, _d, 64);
2212 case_rfw(unlt, _d, 64);
2214 case_rfw(unle, _d, 64);
2216 case_rfw(uneq, _d, 64);
2218 case_rfw(unge, _d, 64);
2220 case_rfw(ungt, _d, 64);
2222 case_rfw(ltgt, _d, 64);
2224 case_rfw(ord, _d, 64);
2225 case_rff(unord, _d);
2226 case_rfw(unord, _d, 64);
2227 case jit_code_movr_d:
2228 if (jit_x87_reg_p(node->u.w)) {
2229 if (jit_x87_reg_p(node->v.w))
2230 x87_movr_d(rn(node->u.w), rn(node->v.w));
2232 x87_from_sse_d(rn(node->u.w), rn(node->v.w));
2235 if (jit_sse_reg_p(node->v.w))
2236 sse_movr_d(rn(node->u.w), rn(node->v.w));
2238 sse_from_x87_d(rn(node->u.w), rn(node->v.w));
2241 case jit_code_movi_d:
2242 assert(node->flag & jit_flag_data);
2243 if (jit_x87_reg_p(node->u.w))
2244 x87_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
2246 sse_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
2257 case_bfw(lt, _d, 64);
2259 case_bfw(le, _d, 64);
2261 case_bfw(eq, _d, 64);
2263 case_bfw(ge, _d, 64);
2265 case_bfw(gt, _d, 64);
2267 case_bfw(ne, _d, 64);
2269 case_bfw(unlt, _d, 64);
2271 case_bfw(unle, _d, 64);
2273 case_bfw(uneq, _d, 64);
2275 case_bfw(unge, _d, 64);
2277 case_bfw(ungt, _d, 64);
2279 case_bfw(ltgt, _d, 64);
2281 case_bfw(ord, _d, 64);
2282 case_bff(unord, _d);
2283 case_bfw(unord, _d, 64);
2286 jmpr(rn(node->u.w));
2289 if (node->flag & jit_flag_node) {
2291 assert(temp->code == jit_code_label ||
2292 temp->code == jit_code_epilog);
2293 if (temp->flag & jit_flag_patch)
2297 word = _jit->code.length -
2298 (_jit->pc.uc - _jit->code.ptr);
2299 if ((jit_int32_t)word == word)
2300 word = jmpi(_jit->pc.w);
2303 word = jmpi_p(_jit->pc.w);
2312 case jit_code_callr:
2314 callr(rn(node->u.w));
2316 case jit_code_calli:
2317 if (node->flag & jit_flag_node) {
2319 assert(temp->code == jit_code_label ||
2320 temp->code == jit_code_epilog);
2321 if (temp->flag & jit_flag_patch)
2325 word = _jit->code.length -
2326 (_jit->pc.uc - _jit->code.ptr);
2327 if ((jit_int32_t)word == word)
2328 word = calli(_jit->pc.w);
2331 word = calli_p(_jit->pc.w);
2340 case jit_code_prolog:
2341 _jitc->function = _jitc->functions.ptr + node->w.w;
2343 undo.word = _jit->pc.w;
2344 memcpy(&undo.func, _jitc->function, sizeof(undo.func));
2345 #if DEVEL_DISASSEMBLER
2348 undo.patch_offset = _jitc->patches.offset;
2350 compute_framesize();
2355 case jit_code_epilog:
2356 assert(_jitc->function == _jitc->functions.ptr + node->w.w);
2358 for (temp = undo.node->next;
2359 temp != node; temp = temp->next) {
2360 if (temp->code == jit_code_label ||
2361 temp->code == jit_code_epilog)
2362 temp->flag &= ~jit_flag_patch;
2364 temp->flag &= ~jit_flag_patch;
2366 _jit->pc.w = undo.word;
2367 /* undo.func.self.aoff and undo.func.regset should not
2368 * be undone, as they will be further updated, and are
2369 * the reason of the undo. */
2370 undo.func.self.aoff = _jitc->function->frame +
2371 _jitc->function->self.aoff;
2372 undo.func.need_frame = _jitc->function->need_frame;
2373 jit_regset_set(&undo.func.regset, &_jitc->function->regset);
2374 /* allocar information also does not need to be undone */
2375 undo.func.aoffoff = _jitc->function->aoffoff;
2376 undo.func.allocar = _jitc->function->allocar;
2377 /* real stack framesize is not in the jit_function_t,
2378 * if it were, would need to not be undone */
2379 /* cvt_offset must also not be undone */
2380 undo.func.cvt_offset = _jitc->function->cvt_offset;
2381 /* this will be recomputed but undo anyway to have it
2382 * better self documented.*/
2383 undo.func.need_stack = _jitc->function->need_stack;
2384 memcpy(_jitc->function, &undo.func, sizeof(undo.func));
2385 #if DEVEL_DISASSEMBLER
2388 _jitc->patches.offset = undo.patch_offset;
2390 goto restart_function;
2393 (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
2394 nop(sizeof(jit_word_t) - word);
2395 /* remember label is defined */
2396 node->flag |= jit_flag_patch;
2397 node->u.w = _jit->pc.w;
2399 _jitc->function = NULL;
2401 case jit_code_movr_w_f:
2402 if (jit_sse_reg_p(node->u.w))
2403 sse_movr_w_f(rn(node->u.w), rn(node->v.w));
2405 x87_movr_w_f(rn(node->u.w), rn(node->v.w));
2407 case jit_code_movr_f_w:
2408 if (jit_sse_reg_p(node->v.w))
2409 sse_movr_f_w(rn(node->u.w), rn(node->v.w));
2411 x87_movr_f_w(rn(node->u.w), rn(node->v.w));
2413 case jit_code_movi_f_w:
2414 assert(node->flag & jit_flag_data);
2415 movi_f_w(rn(node->u.w), *(jit_float32_t *)node->v.n->u.w);
2417 case jit_code_movi_w_f:
2418 if (jit_sse_reg_p(node->u.w))
2419 sse_movi_w_f(rn(node->u.w), node->v.w);
2421 x87_movi_w_f(rn(node->u.w), node->v.w);
2423 # if __X32 || __X64_32
2424 case jit_code_movr_ww_d:
2425 if (jit_sse_reg_p(node->u.w))
2426 sse_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
2428 x87_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
2430 case jit_code_movr_d_ww:
2431 if (jit_sse_reg_p(node->w.w))
2432 sse_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
2434 x87_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
2436 case jit_code_movi_d_ww:
2437 assert(node->flag & jit_flag_data);
2438 movi_d_ww(rn(node->u.w), rn(node->v.w),
2439 *(jit_float64_t *)node->w.n->u.w);
2441 case jit_code_movi_ww_d:
2442 if (jit_sse_reg_p(node->u.w))
2443 sse_movi_ww_d(rn(node->u.w), node->v.w, node->w.w);
2445 x87_movi_ww_d(rn(node->u.w), node->v.w, node->w.w);
2448 case jit_code_movr_w_d:
2449 if (jit_sse_reg_p(node->u.w))
2450 sse_movr_w_d(rn(node->u.w), rn(node->v.w));
2452 x87_movr_w_d(rn(node->u.w), rn(node->v.w));
2454 case jit_code_movr_d_w:
2455 if (jit_sse_reg_p(node->v.w))
2456 sse_movr_d_w(rn(node->u.w), rn(node->v.w));
2458 x87_movr_d_w(rn(node->u.w), rn(node->v.w));
2460 case jit_code_movi_d_w:
2461 assert(node->flag & jit_flag_data);
2462 movi_d_w(rn(node->u.w), *(jit_float64_t *)node->v.n->u.w);
2464 case jit_code_movi_w_d:
2465 if (jit_sse_reg_p(node->u.w))
2466 sse_movi_w_d(rn(node->u.w), node->v.w);
2468 x87_movi_w_d(rn(node->u.w), node->v.w);
2471 case jit_code_va_start:
2472 vastart(rn(node->u.w));
2474 case jit_code_va_arg:
2475 vaarg(rn(node->u.w), rn(node->v.w));
2477 case jit_code_va_arg_d:
2478 vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w));
2480 case jit_code_live: case jit_code_ellipsis:
2481 case jit_code_va_push:
2482 case jit_code_allocai: case jit_code_allocar:
2483 case jit_code_arg_c: case jit_code_arg_s:
2484 case jit_code_arg_i:
2485 # if __WORDSIZE == 64
2486 case jit_code_arg_l:
2488 case jit_code_arg_f: case jit_code_arg_d:
2489 case jit_code_va_end:
2491 case jit_code_retr_c: case jit_code_reti_c:
2492 case jit_code_retr_uc: case jit_code_reti_uc:
2493 case jit_code_retr_s: case jit_code_reti_s:
2494 case jit_code_retr_us: case jit_code_reti_us:
2495 case jit_code_retr_i: case jit_code_reti_i:
2496 #if __WORDSIZE == 64
2497 case jit_code_retr_ui: case jit_code_reti_ui:
2498 case jit_code_retr_l: case jit_code_reti_l:
2500 case jit_code_retr_f: case jit_code_reti_f:
2501 case jit_code_retr_d: case jit_code_reti_d:
2502 case jit_code_getarg_c: case jit_code_getarg_uc:
2503 case jit_code_getarg_s: case jit_code_getarg_us:
2504 case jit_code_getarg_i:
2505 #if __X64 && !__X64_32
2506 case jit_code_getarg_ui: case jit_code_getarg_l:
2508 case jit_code_getarg_f: case jit_code_getarg_d:
2509 case jit_code_putargr_c: case jit_code_putargi_c:
2510 case jit_code_putargr_uc: case jit_code_putargi_uc:
2511 case jit_code_putargr_s: case jit_code_putargi_s:
2512 case jit_code_putargr_us: case jit_code_putargi_us:
2513 case jit_code_putargr_i: case jit_code_putargi_i:
2514 #if __WORDSIZE == 64
2515 case jit_code_putargr_ui: case jit_code_putargi_ui:
2516 case jit_code_putargr_l: case jit_code_putargi_l:
2518 case jit_code_putargr_f: case jit_code_putargi_f:
2519 case jit_code_putargr_d: case jit_code_putargi_d:
2520 case jit_code_pushargr_c: case jit_code_pushargi_c:
2521 case jit_code_pushargr_uc: case jit_code_pushargi_uc:
2522 case jit_code_pushargr_s: case jit_code_pushargi_s:
2523 case jit_code_pushargr_us: case jit_code_pushargi_us:
2524 case jit_code_pushargr_i: case jit_code_pushargi_i:
2525 #if __WORDSIZE == 64
2526 case jit_code_pushargr_ui: case jit_code_pushargi_ui:
2527 case jit_code_pushargr_l: case jit_code_pushargi_l:
2529 case jit_code_pushargr_f: case jit_code_pushargi_f:
2530 case jit_code_pushargr_d: case jit_code_pushargi_d:
2531 case jit_code_retval_c: case jit_code_retval_uc:
2532 case jit_code_retval_s: case jit_code_retval_us:
2533 case jit_code_retval_i:
2535 case jit_code_retval_ui: case jit_code_retval_l:
2537 case jit_code_prepare:
2538 case jit_code_finishr: case jit_code_finishi:
2539 case jit_code_negi_f: case jit_code_absi_f:
2540 case jit_code_sqrti_f: case jit_code_negi_d:
2541 case jit_code_absi_d: case jit_code_sqrti_d:
2542 case jit_code_fmai_f: case jit_code_fmsi_f:
2543 case jit_code_fmai_d: case jit_code_fmsi_d:
2544 case jit_code_fnmai_f: case jit_code_fnmsi_f:
2545 case jit_code_fnmai_d: case jit_code_fnmsi_d:
2547 case jit_code_retval_f:
2549 if (jit_sse_reg_p(node->u.w)) {
2551 sse_from_x87_f(rn(node->u.w), _ST0_REGNO);
2554 fstpr(rn(node->u.w) + 1);
2557 case jit_code_retval_d:
2559 if (jit_sse_reg_p(node->u.w)) {
2561 sse_from_x87_d(rn(node->u.w), _ST0_REGNO);
2564 fstpr(rn(node->u.w) + 1);
2568 negi(rn(node->u.w), node->v.w);
2571 comi(rn(node->u.w), node->v.w);
2573 case jit_code_exti_c:
2574 exti_c(rn(node->u.w), node->v.w);
2576 case jit_code_exti_uc:
2577 exti_uc(rn(node->u.w), node->v.w);
2579 case jit_code_exti_s:
2580 exti_s(rn(node->u.w), node->v.w);
2582 case jit_code_exti_us:
2583 exti_us(rn(node->u.w), node->v.w);
2585 case jit_code_bswapi_us:
2586 bswapi_us(rn(node->u.w), node->v.w);
2588 case jit_code_bswapi_ui:
2589 bswapi_ui(rn(node->u.w), node->v.w);
2591 case jit_code_htoni_us:
2592 htoni_us(rn(node->u.w), node->v.w);
2594 case jit_code_htoni_ui:
2595 htoni_ui(rn(node->u.w), node->v.w);
2597 #if __X64 && !__X64_32
2598 case jit_code_exti_i:
2599 exti_i(rn(node->u.w), node->v.w);
2601 case jit_code_exti_ui:
2602 exti_ui(rn(node->u.w), node->v.w);
2604 case jit_code_bswapi_ul:
2605 bswapi_ul(rn(node->u.w), node->v.w);
2607 case jit_code_htoni_ul:
2608 htoni_ul(rn(node->u.w), node->v.w);
2612 cloi(rn(node->u.w), node->v.w);
2615 clzi(rn(node->u.w), node->v.w);
2618 ctoi(rn(node->u.w), node->v.w);
2621 ctzi(rn(node->u.w), node->v.w);
2623 case jit_code_rbiti:
2624 rbiti(rn(node->u.w), node->v.w);
2626 case jit_code_popcnti:
2627 popcnti(rn(node->u.w), node->v.w);
2630 exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
2632 case jit_code_exti_u:
2633 exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
2638 jit_regarg_clr(node, value);
2639 assert(_jitc->regarg == 0 && _jitc->synth == 0);
2640 /* update register live state */
2662 for (offset = 0; offset < _jitc->patches.offset; offset++) {
2663 node = _jitc->patches.ptr[offset].node;
2664 word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
2665 patch_at(_jitc->patches.ptr[offset].inst, word);
2668 jit_flush(_jit->code.ptr, _jit->pc.uc);
2670 return (_jit->code.ptr);
2674 # include "jit_x86-cpu.c"
2675 # include "jit_x86-sse.c"
2676 # include "jit_x86-x87.c"
2680 jit_flush(void *fptr, void *tptr)
2685 _emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
2687 ldxi(rn(r0), rn(r1), i0);
2691 _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
2693 stxi(i0, rn(r0), rn(r1));
2697 _emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
2699 if (jit_x87_reg_p(r0))
2700 x87_ldxi_d(rn(r0), rn(r1), i0);
2702 sse_ldxi_d(rn(r0), rn(r1), i0);
2706 _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
2708 if (jit_x87_reg_p(r1))
2709 x87_stxi_d(i0, rn(r0), rn(r1));
2711 sse_stxi_d(i0, rn(r0), rn(r1));
2715 _compute_framesize(jit_state_t *_jit)
2718 /* Save stack pointer in first slot */
2719 _jitc->framesize = REAL_WORDSIZE;
2720 for (reg = 0; reg < jit_size(iregs); reg++)
2721 if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
2722 _jitc->framesize += REAL_WORDSIZE;
2724 #if __X64 && (__CYGWIN__ || _WIN32)
2725 for (reg = 0; reg < jit_size(fregs); reg++)
2726 if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
2727 _jitc->framesize += sizeof(jit_float64_t);
2729 /* Make sure functions called have a 16 byte aligned stack */
2730 _jitc->framesize = (_jitc->framesize + 15) & -16;
2731 _jitc->framesize += 16 - REAL_WORDSIZE;
2735 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
2739 assert(node->flag & jit_flag_node);
2740 if (node->code == jit_code_movi)
2741 flag = node->v.n->flag;
2743 flag = node->u.n->flag;
2744 assert(!(flag & jit_flag_patch));
2745 if (_jitc->patches.offset >= _jitc->patches.length) {
2746 jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
2747 _jitc->patches.length * sizeof(jit_patch_t),
2748 (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
2749 _jitc->patches.length += 1024;
2751 _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
2752 _jitc->patches.ptr[_jitc->patches.offset].node = node;
2753 ++_jitc->patches.offset;
2757 _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2760 x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2761 sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2765 _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2768 x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2769 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2773 _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2776 sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2777 x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2781 _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2784 sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2785 x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);