add a thp-based huge page alloc fallback
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86.c
1 /*
2  * Copyright (C) 2012-2023  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #include <lightning.h>
21 #include <lightning/jit_private.h>
22
23 #if __X32
24 #  define CAN_RIP_ADDRESS               0
25 #  define address_p(i0)                 1
26 #  define jit_arg_reg_p(i)              0
27 #  define jit_arg_f_reg_p(i)            0
28 /* callee save                        + 16 byte align
29  * align16(%ebp + %rbx + %rsi + %rdi) + (16 - 4)  */
30 #  define stack_framesize               28
31 #  define REAL_WORDSIZE                 4
32 #  define va_gp_increment               4
33 #  define va_fp_increment               8
34 #else
35 #  if _WIN32 || __X64_32
36 #    define CAN_RIP_ADDRESS             0
37 #  else
38 #    define CAN_RIP_ADDRESS             1
39 #  endif
40 #  if __X64_32
41 #    define address_p(i0)               ((jit_word_t)(i0) >= 0)
42 #  else
43 #    define address_p(i0)               can_sign_extend_int_p(i0)
44 #  endif
45 #  if __CYGWIN__ || _WIN32
46 #    define jit_arg_reg_p(i)            ((i) >= 0 && (i) < 4)
47 #    define jit_arg_f_reg_p(i)          jit_arg_reg_p(i)
48 /* callee save                                                + 16 byte align
49  * align16(%rbp+%rbx+%rdi+%rsi+%r1[2-5]+%xmm[6-9]+%xmm1[0-5]) + (16 - 8) */
50 #    define stack_framesize             152
51 #    define va_fp_increment             8
52 #  else
53 #    define jit_arg_reg_p(i)            ((i) >= 0 && (i) < 6)
54 #    define jit_arg_f_reg_p(i)          ((i) >= 0 && (i) < 8)
55 /* callee save                                      + 16 byte align
56  * align16(%rbp + %r15 + %r14 + %r13 + %r12 + %rbx) + (16 - 8) */
57 #    define stack_framesize             56
58 #    define first_gp_argument           rdi
59 #    define first_gp_offset             offsetof(jit_va_list_t, rdi)
60 #    define first_gp_from_offset(gp)    ((gp) / 8)
61 #    define last_gp_argument            r9
62 #    define va_gp_max_offset                                            \
63         (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
64 #    define first_fp_argument           xmm0
65 #    define first_fp_offset             offsetof(jit_va_list_t, xmm0)
66 #    define last_fp_argument            xmm7
67 #    define va_fp_max_offset                                            \
68         (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
69 #    define va_fp_increment             16
70 #    define first_fp_from_offset(fp)    (((fp) - va_gp_max_offset) / 16)
71 #  endif
72 #  define va_gp_increment               8
73 #  define REAL_WORDSIZE                 8
74 #endif
75 #define CVT_OFFSET                      _jitc->function->cvt_offset
76
77 #define CHECK_CVT_OFFSET()                                              \
78     do {                                                                \
79         if (!_jitc->function->cvt_offset) {                             \
80             _jitc->again = 1;                                           \
81             _jitc->function->cvt_offset =                               \
82                  jit_allocai(sizeof(jit_float64_t));                    \
83         }                                                               \
84     } while (0)
85
86 /*
87  * Types
88  */
89 #if __X32 || __CYGWIN__ || _WIN32
90 typedef jit_pointer_t jit_va_list_t;
91 #else
92 typedef struct jit_va_list {
93     jit_int32_t         gpoff;
94     jit_int32_t         fpoff;
95     jit_pointer_t       over;
96     jit_pointer_t       save;
97     /* Declared explicitly as int64 for the x32 abi */
98     jit_int64_t         rdi;
99     jit_int64_t         rsi;
100     jit_int64_t         rdx;
101     jit_int64_t         rcx;
102     jit_int64_t         r8;
103     jit_int64_t         r9;
104     jit_float64_t       xmm0;
105     jit_float64_t       _up0;
106     jit_float64_t       xmm1;
107     jit_float64_t       _up1;
108     jit_float64_t       xmm2;
109     jit_float64_t       _up2;
110     jit_float64_t       xmm3;
111     jit_float64_t       _up3;
112     jit_float64_t       xmm4;
113     jit_float64_t       _up4;
114     jit_float64_t       xmm5;
115     jit_float64_t       _up5;
116     jit_float64_t       xmm6;
117     jit_float64_t       _up6;
118     jit_float64_t       xmm7;
119     jit_float64_t       _up7;
120 } jit_va_list_t;
121 #endif
122
123 /*
124  * Prototypes
125  */
126 #define compute_framesize()             _compute_framesize(_jit)
127 static void _compute_framesize(jit_state_t*);
128 #define patch(instr, node)              _patch(_jit, instr, node)
129 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
130 #define sse_from_x87_f(r0, r1)          _sse_from_x87_f(_jit, r0, r1)
131 static void _sse_from_x87_f(jit_state_t*,jit_int32_t,jit_int32_t);
132 #define sse_from_x87_d(r0, r1)          _sse_from_x87_d(_jit, r0, r1)
133 static void _sse_from_x87_d(jit_state_t*,jit_int32_t,jit_int32_t);
134 #define x87_from_sse_f(r0, r1)          _x87_from_sse_f(_jit, r0, r1)
135 static void _x87_from_sse_f(jit_state_t*,jit_int32_t,jit_int32_t);
136 #define x87_from_sse_d(r0, r1)          _x87_from_sse_d(_jit, r0, r1)
137 static void _x87_from_sse_d(jit_state_t*,jit_int32_t,jit_int32_t);
138
139 #define PROTO                           1
140 #  include "jit_x86-cpu.c"
141 #  include "jit_x86-sse.c"
142 #  include "jit_x86-x87.c"
143 #undef PROTO
144
145 /*
146  * Initialization
147  */
148 jit_cpu_t               jit_cpu;
149 jit_register_t          _rvs[] = {
150 #if __X32
151     { rc(gpr) | rc(rg8) | 0,            "%eax" },
152     { rc(gpr) | rc(rg8) | 1,            "%ecx" },
153     { rc(gpr) | rc(rg8) | 2,            "%edx" },
154     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%ebx" },
155     { rc(sav) | rc(gpr) | 6,            "%esi" },
156     { rc(sav) | rc(gpr) | 7,            "%edi" },
157     { rc(sav) | 4,                      "%esp" },
158     { rc(sav) | 5,                      "%ebp" },
159     { rc(xpr) | rc(fpr) | 0,            "%xmm0" },
160     { rc(xpr) | rc(fpr) | 1,            "%xmm1" },
161     { rc(xpr) | rc(fpr) | 2,            "%xmm2" },
162     { rc(xpr) | rc(fpr) | 3,            "%xmm3" },
163     { rc(xpr) | rc(fpr) | 4,            "%xmm4" },
164     { rc(xpr) | rc(fpr) | 5,            "%xmm5" },
165     { rc(xpr) | rc(fpr) | 6,            "%xmm6" },
166     { rc(xpr) | rc(fpr) | 7,            "%xmm7" },
167     { rc(fpr) | 0,                      "st(0)" },
168     { rc(fpr) | 1,                      "st(1)" },
169     { rc(fpr) | 2,                      "st(2)" },
170     { rc(fpr) | 3,                      "st(3)" },
171     { rc(fpr) | 4,                      "st(4)" },
172     { rc(fpr) | 5,                      "st(5)" },
173     { rc(fpr) | 6,                      "st(6)" },
174     { rc(fpr) | 7,                      "st(7)" },
175 #else
176 #  if __CYGWIN__ || _WIN32
177     { rc(gpr) | rc(rg8) | 0,            "%rax" },
178     { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" },
179     { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" },
180     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%rbx" },
181     { rc(sav) | rc(gpr) | 7,            "%rdi" },
182     { rc(sav) | rc(gpr) | 6,            "%rsi" },
183     { rc(sav) | rc(gpr) | 12,           "%r12" },
184     { rc(sav) | rc(gpr) | 13,           "%r13" },
185     { rc(sav) | rc(gpr) | 14,           "%r14" },
186     { rc(sav) | rc(gpr) | 15,           "%r15" },
187     { rc(arg) | rc(rg8) | rc(gpr) | 9,  "%r9" },
188     { rc(arg) | rc(rg8) | rc(gpr) | 8,  "%r8" },
189     { rc(arg) | rc(rg8) | rc(gpr) | 2,  "%rdx" },
190     { rc(arg) | rc(rg8) | rc(gpr) | 1,  "%rcx" },
191     { rc(sav) | 4,                      "%rsp" },
192     { rc(sav) | 5,                      "%rbp" },
193     { rc(xpr) | rc(fpr) | 4,            "%xmm4" },
194     { rc(xpr) | rc(fpr) | 5,            "%xmm5" },
195     { rc(sav) | rc(xpr) | rc(fpr) | 6,  "%xmm6" },
196     { rc(sav) | rc(xpr) | rc(fpr) | 7,  "%xmm7" },
197     { rc(sav) | rc(xpr) | rc(fpr) | 8,  "%xmm8" },
198     { rc(sav) | rc(xpr) | rc(fpr) | 9,  "%xmm9" },
199     { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" },
200     { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" },
201     { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" },
202     { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" },
203     { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" },
204     { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" },
205     { rc(xpr) | rc(arg) | rc(fpr) | 3,  "%xmm3" },
206     { rc(xpr) | rc(arg) | rc(fpr) | 2,  "%xmm2" },
207     { rc(xpr) | rc(arg) | rc(fpr) | 1,  "%xmm1" },
208     { rc(xpr) | rc(arg) | rc(fpr) | 0,  "%xmm0" },
209 #else
210     /* %rax is a pseudo flag argument for varargs functions */
211     { rc(arg) | rc(gpr) | rc(rg8) | 0,  "%rax" },
212     { rc(gpr) | rc(rg8) | 10,           "%r10" },
213     { rc(gpr) | rc(rg8) | 11,           "%r11" },
214     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%rbx" },
215     { rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" },
216     { rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" },
217     { rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" },
218     { rc(sav) | rc(gpr) | rc(rg8) | 12, "%r12" },
219     { rc(arg) | rc(rg8) | rc(gpr) | 9,  "%r9" },
220     { rc(arg) | rc(rg8) | rc(gpr) | 8,  "%r8" },
221     { rc(arg) | rc(rg8) | rc(gpr) | 1,  "%rcx" },
222     { rc(arg) | rc(rg8) | rc(gpr) | 2,  "%rdx" },
223     { rc(arg) | rc(rg8) | rc(gpr) | 6,  "%rsi" },
224     { rc(arg) | rc(rg8) | rc(gpr) | 7,  "%rdi" },
225     { rc(sav) | 4,                      "%rsp" },
226     { rc(sav) | 5,                      "%rbp" },
227     { rc(xpr) | rc(fpr) | 8,            "%xmm8" },
228     { rc(xpr) | rc(fpr) | 9,            "%xmm9" },
229     { rc(xpr) | rc(fpr) | 10,           "%xmm10" },
230     { rc(xpr) | rc(fpr) | 11,           "%xmm11" },
231     { rc(xpr) | rc(fpr) | 12,           "%xmm12" },
232     { rc(xpr) | rc(fpr) | 13,           "%xmm13" },
233     { rc(xpr) | rc(fpr) | 14,           "%xmm14" },
234     { rc(xpr) | rc(fpr) | 15,           "%xmm15" },
235     { rc(xpr) | rc(arg) | rc(fpr) | 7,  "%xmm7" },
236     { rc(xpr) | rc(arg) | rc(fpr) | 6,  "%xmm6" },
237     { rc(xpr) | rc(arg) | rc(fpr) | 5,  "%xmm5" },
238     { rc(xpr) | rc(arg) | rc(fpr) | 4,  "%xmm4" },
239     { rc(xpr) | rc(arg) | rc(fpr) | 3,  "%xmm3" },
240     { rc(xpr) | rc(arg) | rc(fpr) | 2,  "%xmm2" },
241     { rc(xpr) | rc(arg) | rc(fpr) | 1,  "%xmm1" },
242     { rc(xpr) | rc(arg) | rc(fpr) | 0,  "%xmm0" },
243 #  endif
244     { rc(fpr) | 0,                      "st(0)" },
245     { rc(fpr) | 1,                      "st(1)" },
246     { rc(fpr) | 2,                      "st(2)" },
247     { rc(fpr) | 3,                      "st(3)" },
248     { rc(fpr) | 4,                      "st(4)" },
249     { rc(fpr) | 5,                      "st(5)" },
250     { rc(fpr) | 6,                      "st(6)" },
251     { rc(fpr) | 7,                      "st(7)" },
252 #endif
253     { _NOREG,                           "<none>" },
254 };
255
256 static jit_int32_t iregs[] = {
257 #if __X32
258     _RBX, _RSI, _RDI,
259 #elif (__CYGWIN__ || _WIN32)
260     _RBX, _RDI, _RSI, _R12, _R13, _R14, _R15,
261 #else
262     _R15, _R14, _R13, _R12, _RBX,
263 #endif
264 };
265
266 #if __X64 && (__CYGWIN__ || _WIN32)
267 static jit_int32_t fregs[] = {
268     _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
269 };
270 #endif
271
272 /*
273  * Implementation
274  */
275 void
276 jit_get_cpu(void)
277 {
278     union {
279         /* eax=7 and ecx=0 */
280         struct {
281             jit_uword_t fsgsbase        : 1;
282             jit_uword_t IA32_TSC_ADJUST : 1;
283             jit_uword_t sgx             : 1;
284             jit_uword_t bmi1            : 1;
285             jit_uword_t hle             : 1;
286             jit_uword_t avx2            : 1;
287             jit_uword_t FDP_EXCPTN_ONLY : 1;
288             jit_uword_t smep            : 1;
289             jit_uword_t bmi2            : 1;
290             jit_uword_t erms            : 1;
291             jit_uword_t invpcid         : 1;
292             jit_uword_t rtm             : 1;
293             jit_uword_t rdt_m_pqm       : 1;
294             jit_uword_t dep_FPU_CS_DS   : 1;
295             jit_uword_t mpx             : 1;
296             jit_uword_t rdt_a_pqe       : 1;
297             jit_uword_t avx512_f        : 1;
298             jit_uword_t avx512_dq       : 1;
299             jit_uword_t rdseed          : 1;
300             jit_uword_t adx             : 1;
301             jit_uword_t smap            : 1;
302             jit_uword_t avx512_ifma     : 1;
303             jit_uword_t __reserved0     : 1;
304             jit_uword_t clflushopt      : 1;
305             jit_uword_t clwb            : 1;
306             jit_uword_t pt              : 1;
307             jit_uword_t avx512_pf       : 1;
308             jit_uword_t avx512_er       : 1;
309             jit_uword_t avx512_cd       : 1;
310             jit_uword_t sha             : 1;
311             jit_uword_t avx512_bw       : 1;
312             jit_uword_t avx512_vl       : 1;
313         } bits;
314         jit_uword_t     cpuid;
315     } ebx;
316     union {
317         /* eax=0 */
318         struct {
319             jit_uint32_t sse3           : 1;
320             jit_uint32_t pclmulqdq      : 1;
321             jit_uint32_t dtes64         : 1;    /* amd reserved */
322             jit_uint32_t monitor        : 1;
323             jit_uint32_t ds_cpl         : 1;    /* amd reserved */
324             jit_uint32_t vmx            : 1;    /* amd reserved */
325             jit_uint32_t smx            : 1;    /* amd reserved */
326             jit_uint32_t est            : 1;    /* amd reserved */
327             jit_uint32_t tm2            : 1;    /* amd reserved */
328             jit_uint32_t ssse3          : 1;
329             jit_uint32_t cntx_id        : 1;    /* amd reserved */
330             jit_uint32_t __reserved0    : 1;
331             jit_uint32_t fma            : 1;
332             jit_uint32_t cmpxchg16b     : 1;
333             jit_uint32_t xtpr           : 1;    /* amd reserved */
334             jit_uint32_t pdcm           : 1;    /* amd reserved */
335             jit_uint32_t __reserved1    : 1;
336             jit_uint32_t pcid           : 1;    /* amd reserved */
337             jit_uint32_t dca            : 1;    /* amd reserved */
338             jit_uint32_t sse4_1         : 1;
339             jit_uint32_t sse4_2         : 1;
340             jit_uint32_t x2apic         : 1;    /* amd reserved */
341             jit_uint32_t movbe          : 1;    /* amd reserved */
342             jit_uint32_t popcnt         : 1;
343             jit_uint32_t tsc            : 1;    /* amd reserved */
344             jit_uint32_t aes            : 1;
345             jit_uint32_t xsave          : 1;
346             jit_uint32_t osxsave        : 1;
347             jit_uint32_t avx            : 1;
348             jit_uint32_t __reserved2    : 1;    /* amd F16C */
349             jit_uint32_t __reserved3    : 1;
350             jit_uint32_t __alwayszero   : 1;    /* amd RAZ */
351         } bits;
352         jit_uword_t     cpuid;
353     } ecx;
354     union {
355         /* eax=0 */
356         struct {
357             jit_uint32_t fpu            : 1;
358             jit_uint32_t vme            : 1;
359             jit_uint32_t de             : 1;
360             jit_uint32_t pse            : 1;
361             jit_uint32_t tsc            : 1;
362             jit_uint32_t msr            : 1;
363             jit_uint32_t pae            : 1;
364             jit_uint32_t mce            : 1;
365             jit_uint32_t cmpxchg8b      : 1;
366             jit_uint32_t apic           : 1;
367             jit_uint32_t __reserved0    : 1;
368             jit_uint32_t sep            : 1;
369             jit_uint32_t mtrr           : 1;
370             jit_uint32_t pge            : 1;
371             jit_uint32_t mca            : 1;
372             jit_uint32_t cmov           : 1;
373             jit_uint32_t pat            : 1;
374             jit_uint32_t pse36          : 1;
375             jit_uint32_t psn            : 1;    /* amd reserved */
376             jit_uint32_t clfsh          : 1;
377             jit_uint32_t __reserved1    : 1;
378             jit_uint32_t ds             : 1;    /* amd reserved */
379             jit_uint32_t acpi           : 1;    /* amd reserved */
380             jit_uint32_t mmx            : 1;
381             jit_uint32_t fxsr           : 1;
382             jit_uint32_t sse            : 1;
383             jit_uint32_t sse2           : 1;
384             jit_uint32_t ss             : 1;    /* amd reserved */
385             jit_uint32_t htt            : 1;
386             jit_uint32_t tm             : 1;    /* amd reserved */
387             jit_uint32_t __reserved2    : 1;
388             jit_uint32_t pbe            : 1;    /* amd reserved */
389         } bits;
390         jit_uword_t     cpuid;
391     } edx;
392 #if __X32
393     int                 ac, flags;
394 #endif
395     jit_uword_t         eax;
396
397 #if __X32
398     /* adapted from glibc __sysconf */
399     __asm__ volatile ("pushfl;\n\t"
400                       "popl %0;\n\t"
401                       "movl $0x240000, %1;\n\t"
402                       "xorl %0, %1;\n\t"
403                       "pushl %1;\n\t"
404                       "popfl;\n\t"
405                       "pushfl;\n\t"
406                       "popl %1;\n\t"
407                       "xorl %0, %1;\n\t"
408                       "pushl %0;\n\t"
409                       "popfl"
410                       : "=r" (flags), "=r" (ac));
411
412     /* i386 or i486 without cpuid */
413     if ((ac & (1 << 21)) == 0)
414         /* probably without x87 as well */
415         return;
416 #endif
417
418     /* query %eax = 1 function */
419 #if __X32 || __X64_32
420     __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
421 #else
422     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
423 #endif
424                       : "=a" (eax), "=r" (ebx.cpuid),
425                       "=c" (ecx.cpuid), "=d" (edx.cpuid)
426                       : "0" (1));
427
428     jit_cpu.fpu         = edx.bits.fpu;
429     jit_cpu.cmpxchg8b   = edx.bits.cmpxchg8b;
430     jit_cpu.cmov        = edx.bits.cmov;
431     jit_cpu.mmx         = edx.bits.mmx;
432     jit_cpu.sse         = edx.bits.sse;
433     jit_cpu.sse2        = edx.bits.sse2;
434     jit_cpu.sse3        = ecx.bits.sse3;
435     jit_cpu.pclmulqdq   = ecx.bits.pclmulqdq;
436     jit_cpu.ssse3       = ecx.bits.ssse3;
437     jit_cpu.fma         = ecx.bits.fma;
438     jit_cpu.cmpxchg16b  = ecx.bits.cmpxchg16b;
439     jit_cpu.sse4_1      = ecx.bits.sse4_1;
440     jit_cpu.sse4_2      = ecx.bits.sse4_2;
441     jit_cpu.movbe       = ecx.bits.movbe;
442     jit_cpu.popcnt      = ecx.bits.popcnt;
443     jit_cpu.aes         = ecx.bits.aes;
444     jit_cpu.avx         = ecx.bits.avx;
445
446     /* query %eax = 7 and ecx = 0 function */
447 #if __X64
448     __asm__ volatile ("cpuid"
449                       : "=a" (eax), "=b" (ebx.cpuid), "=c" (ecx), "=d" (edx)
450                       : "a" (7), "c" (0));
451 #endif
452     jit_cpu.adx         = ebx.bits.adx;
453     jit_cpu.bmi2        = ebx.bits.bmi2;
454
455
456     /* query %eax = 0x80000001 function */
457 #if __X64
458 #  if __X64_32
459     __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
460 #  else
461     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
462 #  endif
463                       : "=a" (eax), "=r" (ebx.cpuid),
464                       "=c" (ecx.cpuid), "=d" (edx.cpuid)
465                       : "0" (0x80000001));
466     jit_cpu.lahf        = !!(ecx.cpuid & 1);
467     jit_cpu.abm         = !!(ecx.cpuid & 32);
468     jit_cpu.fma4        = !!(ecx.cpuid & (1 << 16));
469 #endif
470 }
471
472 void
473 _jit_init(jit_state_t *_jit)
474 {
475 #if __X32
476     jit_int32_t         regno;
477     static jit_bool_t   first = 1;
478 #endif
479
480     _jitc->reglen = jit_size(_rvs) - 1;
481 #if __X32
482     if (first) {
483         if (!jit_cpu.sse2) {
484             for (regno = _jitc->reglen; regno >= 0; regno--) {
485                 if (_rvs[regno].spec & jit_class_xpr)
486                     _rvs[regno].spec = 0;
487             }
488         }
489         first = 0;
490     }
491 #endif
492 }
493
494 void
495 _jit_prolog(jit_state_t *_jit)
496 {
497     jit_int32_t         offset;
498
499     if (_jitc->function)
500         jit_epilog();
501     assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
502     jit_regset_set_ui(&_jitc->regsav, 0);
503     offset = _jitc->functions.offset;
504     if (offset >= _jitc->functions.length) {
505         jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
506                     _jitc->functions.length * sizeof(jit_function_t),
507                     (_jitc->functions.length + 16) * sizeof(jit_function_t));
508         _jitc->functions.length += 16;
509     }
510     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
511     /* One extra stack slot for implicit saved returned address */
512     _jitc->function->self.size = stack_framesize + REAL_WORDSIZE;
513     _jitc->function->self.argi = _jitc->function->self.argf =
514         _jitc->function->self.aoff = _jitc->function->self.alen = 0;
515     _jitc->function->cvt_offset = 0;
516 #if __X64 && (__CYGWIN__ || _WIN32)
517     /* force framepointer */
518     jit_check_frame();
519 #endif
520     _jitc->function->self.call = jit_call_default;
521     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
522               _jitc->reglen * sizeof(jit_int32_t));
523
524     /* _no_link here does not mean the jit_link() call can be removed
525      * by rewriting as:
526      * _jitc->function->prolog = jit_new_node(jit_code_prolog);
527      */
528     _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
529     jit_link(_jitc->function->prolog);
530     _jitc->function->prolog->w.w = offset;
531     _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
532     /*  u:      label value
533      *  v:      offset in blocks vector
534      *  w:      offset in functions vector
535      */
536     _jitc->function->epilog->w.w = offset;
537
538     jit_regset_new(&_jitc->function->regset);
539 }
540
541 jit_int32_t
542 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
543 {
544     assert(_jitc->function);
545     jit_check_frame();
546 #if __X32
547     /* Stack is 4 bytes aligned but jit functions keep it 8 bytes aligned.
548      * Called functions have 16 byte aligned stack. */
549     if (!_jitc->function->self.aoff)
550         _jitc->function->self.aoff = -4;
551 #endif
552     switch (length) {
553         case 0: case 1:                                         break;
554         case 2:         _jitc->function->self.aoff &= -2;       break;
555         case 3: case 4: _jitc->function->self.aoff &= -4;       break;
556         default:        _jitc->function->self.aoff &= -8;       break;
557     }
558     _jitc->function->self.aoff -= length;
559
560     /* jit_allocai() may be called from jit_x86-cpu.c, and force a function
561      * generation restart on some conditions: div/rem and qmul/qdiv, due
562      * to registers constraints.
563      * The check is to prevent an assertion of a jit_xyz() being called
564      * during code generation, and attempting to add a node to the tail
565      * of the current IR generation. */
566     if (!_jitc->realize) {
567         jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
568         jit_dec_synth();
569     }
570
571     return (_jitc->function->self.aoff);
572 }
573
574 void
575 _jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
576 {
577     jit_int32_t          reg;
578     assert(_jitc->function);
579     jit_inc_synth_ww(allocar, u, v);
580     if (!_jitc->function->allocar) {
581         _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
582         _jitc->function->allocar = 1;
583     }
584     reg = jit_get_reg(jit_class_gpr);
585     jit_negr(reg, v);
586     jit_andi(reg, reg, -16);
587     jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
588     jit_addr(u, u, reg);
589     jit_addr(JIT_SP, JIT_SP, reg);
590     jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
591     jit_unget_reg(reg);
592     jit_dec_synth();
593 }
594
595 void
596 _jit_ret(jit_state_t *_jit)
597 {
598     jit_node_t          *instr;
599     assert(_jitc->function);
600     jit_inc_synth(ret);
601     /* jump to epilog */
602     instr = jit_jmpi();
603     jit_patch_at(instr, _jitc->function->epilog);
604     jit_dec_synth();
605 }
606
607 void
608 _jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
609 {
610     jit_code_inc_synth_w(code, u);
611     jit_movr(JIT_RET, u);
612     jit_ret();
613     jit_dec_synth();
614 }
615
616 void
617 _jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
618 {
619     jit_code_inc_synth_w(code, u);
620     jit_movi(JIT_RET, u);
621     jit_ret();
622     jit_dec_synth();
623 }
624
625 void
626 _jit_retr_f(jit_state_t *_jit, jit_int32_t u)
627 {
628     jit_inc_synth_w(retr_f, u);
629     if (JIT_FRET != u)
630         jit_movr_f(JIT_FRET, u);
631     else
632         jit_live(JIT_FRET);
633     jit_ret();
634     jit_dec_synth();
635 }
636
637 void
638 _jit_reti_f(jit_state_t *_jit, jit_float32_t u)
639 {
640     jit_inc_synth_f(reti_f, u);
641     jit_movi_f(JIT_FRET, u);
642     jit_ret();
643     jit_dec_synth();
644 }
645
646 void
647 _jit_retr_d(jit_state_t *_jit, jit_int32_t u)
648 {
649     jit_inc_synth_w(retr_d, u);
650     if (JIT_FRET != u)
651         jit_movr_d(JIT_FRET, u);
652     else
653         jit_live(JIT_FRET);
654     jit_ret();
655     jit_dec_synth();
656 }
657
658 void
659 _jit_reti_d(jit_state_t *_jit, jit_float64_t u)
660 {
661     jit_inc_synth_d(reti_d, u);
662     jit_movi_d(JIT_FRET, u);
663     jit_ret();
664     jit_dec_synth();
665 }
666
667 void
668 _jit_epilog(jit_state_t *_jit)
669 {
670     assert(_jitc->function);
671     assert(_jitc->function->epilog->next == NULL);
672     jit_link(_jitc->function->epilog);
673     _jitc->function = NULL;
674 }
675
676 jit_bool_t
677 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
678 {
679     if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
680         return (jit_arg_reg_p(u->u.w));
681     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
682     return (jit_arg_f_reg_p(u->u.w));
683 }
684
685 void
686 _jit_ellipsis(jit_state_t *_jit)
687 {
688     jit_inc_synth(ellipsis);
689     jit_check_frame();
690     if (_jitc->prepare) {
691         jit_link_prepare();
692         /* Remember that a varargs function call is being constructed. */
693         assert(!(_jitc->function->call.call & jit_call_varargs));
694         _jitc->function->call.call |= jit_call_varargs;
695     }
696     else {
697         jit_link_prolog();
698         /* Remember the current function is varargs. */
699         assert(!(_jitc->function->self.call & jit_call_varargs));
700         _jitc->function->self.call |= jit_call_varargs;
701
702 #if __X64 && !(__CYGWIN__ || _WIN32)
703         /* Allocate va_list like object in the stack.
704          * If applicable, with enough space to save all argument
705          * registers, and use fixed offsets for them. */
706         _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
707
708         /* Initialize gp offset in save area. */
709         if (jit_arg_reg_p(_jitc->function->self.argi))
710             _jitc->function->vagp = _jitc->function->self.argi * 8;
711         else
712             _jitc->function->vagp = va_gp_max_offset;
713
714         /* Initialize fp offset in save area. */
715         if (jit_arg_f_reg_p(_jitc->function->self.argf))
716             _jitc->function->vafp = _jitc->function->self.argf * 16 +
717                                     va_gp_max_offset;
718         else
719             _jitc->function->vafp = va_fp_max_offset;
720 #endif
721     }
722     jit_dec_synth();
723 }
724
725 void
726 _jit_va_push(jit_state_t *_jit, jit_int32_t u)
727 {
728     jit_inc_synth_w(va_push, u);
729     jit_pushargr(u);
730     jit_dec_synth();
731 }
732
733 jit_node_t *
734 _jit_arg(jit_state_t *_jit, jit_code_t code)
735 {
736     jit_node_t          *node;
737     jit_int32_t          offset;
738     assert(_jitc->function);
739     assert(!(_jitc->function->self.call & jit_call_varargs));
740 #if STRONG_TYPE_CHECKING
741     assert(code >= jit_code_arg_c && code <= jit_code_arg);
742 #endif
743 #if __X64
744     if (jit_arg_reg_p(_jitc->function->self.argi)) {
745         offset = _jitc->function->self.argi++;
746 #  if __CYGWIN__ || _WIN32
747         _jitc->function->self.size += sizeof(jit_word_t);
748 #  endif
749     }
750     else
751 #endif
752     {
753         offset = _jitc->function->self.size;
754         _jitc->function->self.size += REAL_WORDSIZE;
755         jit_check_frame();
756     }
757     node = jit_new_node_ww(code, offset,
758                            ++_jitc->function->self.argn);
759     jit_link_prolog();
760     return (node);
761 }
762
763 jit_node_t *
764 _jit_arg_f(jit_state_t *_jit)
765 {
766     jit_node_t          *node;
767     jit_int32_t          offset;
768     assert(_jitc->function);
769     assert(!(_jitc->function->self.call & jit_call_varargs));
770 #if __X64
771 #  if __CYGWIN__ || _WIN32
772     if (jit_arg_reg_p(_jitc->function->self.argi)) {
773         offset = _jitc->function->self.argi++;
774         _jitc->function->self.size += sizeof(jit_word_t);
775     }
776 #  else
777     if (jit_arg_f_reg_p(_jitc->function->self.argf))
778         offset = _jitc->function->self.argf++;
779 #  endif
780     else
781 #endif
782     {
783         offset = _jitc->function->self.size;
784         _jitc->function->self.size += REAL_WORDSIZE;
785         jit_check_frame();
786     }
787     node = jit_new_node_ww(jit_code_arg_f, offset,
788                            ++_jitc->function->self.argn);
789     jit_link_prolog();
790     return (node);
791 }
792
793 jit_node_t *
794 _jit_arg_d(jit_state_t *_jit)
795 {
796     jit_node_t          *node;
797     jit_int32_t          offset;
798     assert(_jitc->function);
799     assert(!(_jitc->function->self.call & jit_call_varargs));
800 #if __X64
801 #  if __CYGWIN__ || _WIN32
802     if (jit_arg_reg_p(_jitc->function->self.argi)) {
803         offset = _jitc->function->self.argi++;
804         _jitc->function->self.size += sizeof(jit_word_t);
805     }
806 #  else
807     if (jit_arg_f_reg_p(_jitc->function->self.argf))
808         offset = _jitc->function->self.argf++;
809 #  endif
810     else
811 #endif
812     {
813         offset = _jitc->function->self.size;
814         _jitc->function->self.size += sizeof(jit_float64_t);
815         jit_check_frame();
816     }
817     node = jit_new_node_ww(jit_code_arg_d, offset,
818                            ++_jitc->function->self.argn);
819     jit_link_prolog();
820     return (node);
821 }
822
823 void
824 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
825 {
826     assert_arg_type(v->code, jit_code_arg_c);
827     jit_inc_synth_wp(getarg_c, u, v);
828 #if __X64
829     if (jit_arg_reg_p(v->u.w))
830         jit_extr_c(u, JIT_RA0 - v->u.w);
831     else
832 #endif
833     {
834         jit_node_t      *node = jit_ldxi_c(u, _RBP, v->u.w);
835         jit_link_alist(node);
836     }
837     jit_dec_synth();
838 }
839
840 void
841 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
842 {
843     assert_arg_type(v->code, jit_code_arg_c);
844     jit_inc_synth_wp(getarg_uc, u, v);
845 #if __X64
846     if (jit_arg_reg_p(v->u.w))
847         jit_extr_uc(u, JIT_RA0 - v->u.w);
848     else
849 #endif
850     {
851         jit_node_t      *node = jit_ldxi_uc(u, _RBP, v->u.w);
852         jit_link_alist(node);
853     }
854     jit_dec_synth();
855 }
856
857 void
858 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
859 {
860     assert_arg_type(v->code, jit_code_arg_s);
861     jit_inc_synth_wp(getarg_s, u, v);
862 #if __X64
863     if (jit_arg_reg_p(v->u.w))
864         jit_extr_s(u, JIT_RA0 - v->u.w);
865     else
866 #endif
867     {
868         jit_node_t      *node = jit_ldxi_s(u, _RBP, v->u.w);
869         jit_link_alist(node);
870     }
871     jit_dec_synth();
872 }
873
874 void
875 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
876 {
877     assert_arg_type(v->code, jit_code_arg_s);
878     jit_inc_synth_wp(getarg_us, u, v);
879 #if __X64
880     if (jit_arg_reg_p(v->u.w))
881         jit_extr_us(u, JIT_RA0 - v->u.w);
882     else
883 #endif
884     {
885         jit_node_t      *node = jit_ldxi_us(u, _RBP, v->u.w);
886         jit_link_alist(node);
887     }
888     jit_dec_synth();
889 }
890
891 void
892 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
893 {
894     assert_arg_type(v->code, jit_code_arg_i);
895     jit_inc_synth_wp(getarg_i, u, v);
896 #if __X64
897     if (jit_arg_reg_p(v->u.w)) {
898 #  if __X64_32
899         jit_movr(u, JIT_RA0 - v->u.w);
900 #  else
901         jit_extr_i(u, JIT_RA0 - v->u.w);
902 #  endif
903      }
904     else
905 #endif
906     {
907         jit_node_t      *node = jit_ldxi_i(u, _RBP, v->u.w);
908         jit_link_alist(node);
909     }
910     jit_dec_synth();
911 }
912
913 #if __X64 && !__X64_32
914 void
915 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
916 {
917     assert_arg_type(v->code, jit_code_arg_i);
918     jit_inc_synth_wp(getarg_ui, u, v);
919     if (jit_arg_reg_p(v->u.w))
920         jit_extr_ui(u, JIT_RA0 - v->u.w);
921     else {
922         jit_node_t      *node = jit_ldxi_ui(u, _RBP, v->u.w);
923         jit_link_alist(node);
924     }
925     jit_dec_synth();
926 }
927
928 void
929 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
930 {
931     assert_arg_type(v->code, jit_code_arg_l);
932     jit_inc_synth_wp(getarg_l, u, v);
933     if (jit_arg_reg_p(v->u.w))
934         jit_movr(u, JIT_RA0 - v->u.w);
935     else {
936         jit_node_t      *node = jit_ldxi_l(u, _RBP, v->u.w);
937         jit_link_alist(node);
938     }
939     jit_dec_synth();
940 }
941 #endif
942
943 void
944 _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
945 {
946     assert_putarg_type(code, v->code);
947     jit_code_inc_synth_wp(code, u, v);
948 #if __X64
949     if (jit_arg_reg_p(v->u.w))
950         jit_movr(JIT_RA0 - v->u.w, u);
951     else
952 #endif
953     {
954         jit_node_t      *node = jit_stxi(v->u.w, _RBP, u);
955         jit_link_alist(node);
956     }
957     jit_dec_synth();
958 }
959
960 void
961 _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
962 {
963     jit_int32_t         regno;
964     assert_putarg_type(code, v->code);
965     jit_code_inc_synth_wp(code, u, v);
966 #if __X64
967     if (jit_arg_reg_p(v->u.w))
968         jit_movi(JIT_RA0 - v->u.w, u);
969     else
970 #endif
971     {
972         jit_node_t      *node;
973         regno = jit_get_reg(jit_class_gpr);
974         jit_movi(regno, u);
975         node = jit_stxi(v->u.w, _RBP, regno);
976         jit_link_alist(node);
977         jit_unget_reg(regno);
978     }
979     jit_dec_synth();
980 }
981
982 void
983 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
984 {
985     assert(v->code == jit_code_arg_f);
986     jit_inc_synth_wp(getarg_f, u, v);
987 #if __X64
988     if (jit_arg_f_reg_p(v->u.w))
989         jit_movr_f(u, _XMM0 - v->u.w);
990     else
991 #endif
992     {
993         jit_node_t      *node = jit_ldxi_f(u, _RBP, v->u.w);
994         jit_link_alist(node);
995     }
996     jit_dec_synth();
997 }
998
999 void
1000 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
1001 {
1002     assert(v->code == jit_code_arg_f);
1003     jit_inc_synth_wp(putargr_f, u, v);
1004 #if __X64
1005     if (jit_arg_f_reg_p(v->u.w))
1006         jit_movr_f(_XMM0 - v->u.w, u);
1007     else
1008 #endif
1009     {
1010         jit_node_t      *node = jit_stxi_f(v->u.w, _RBP, u);
1011         jit_link_alist(node);
1012     }
1013     jit_dec_synth();
1014 }
1015
1016 void
1017 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
1018 {
1019     jit_int32_t         regno;
1020     assert(v->code == jit_code_arg_f);
1021     jit_inc_synth_fp(putargi_f, u, v);
1022 #if __X64
1023     if (jit_arg_f_reg_p(v->u.w))
1024         jit_movi_f(_XMM0 - v->u.w, u);
1025     else
1026 #endif
1027     {
1028         jit_node_t      *node;
1029         regno = jit_get_reg(jit_class_fpr);
1030         jit_movi_f(regno, u);
1031         node = jit_stxi_f(v->u.w, _RBP, regno);
1032         jit_link_alist(node);
1033         jit_unget_reg(regno);
1034     }
1035     jit_dec_synth();
1036 }
1037
1038 void
1039 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
1040 {
1041     assert(v->code == jit_code_arg_d);
1042     jit_inc_synth_wp(getarg_d, u, v);
1043 #if __X64
1044     if (jit_arg_f_reg_p(v->u.w))
1045         jit_movr_d(u, _XMM0 - v->u.w);
1046     else
1047 #endif
1048     {
1049         jit_node_t      *node = jit_ldxi_d(u, _RBP, v->u.w);
1050         jit_link_alist(node);
1051     }
1052     jit_dec_synth();
1053 }
1054
1055 void
1056 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
1057 {
1058     assert(v->code == jit_code_arg_d);
1059     jit_inc_synth_wp(putargr_d, u, v);
1060 #if __X64
1061     if (jit_arg_f_reg_p(v->u.w))
1062         jit_movr_d(_XMM0 - v->u.w, u);
1063     else
1064 #endif
1065     {
1066         jit_node_t      *node = jit_stxi_d(v->u.w, _RBP, u);
1067         jit_link_alist(node);
1068     }
1069     jit_dec_synth();
1070 }
1071
1072 void
1073 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
1074 {
1075     jit_int32_t         regno;
1076     assert(v->code == jit_code_arg_d);
1077     jit_inc_synth_dp(putargi_d, u, v);
1078 #if __X64
1079     if (jit_arg_f_reg_p(v->u.w))
1080         jit_movi_d(_XMM0 - v->u.w, u);
1081     else
1082 #endif
1083     {
1084         jit_node_t      *node;
1085         regno = jit_get_reg(jit_class_fpr);
1086         jit_movi_d(regno, u);
1087         node = jit_stxi_d(v->u.w, _RBP, regno);
1088         jit_link_alist(node);
1089         jit_unget_reg(regno);
1090     }
1091     jit_dec_synth();
1092 }
1093
1094 void
1095 _jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
1096 {
1097     assert(_jitc->function);
1098     jit_code_inc_synth_w(code, u);
1099     jit_link_prepare();
1100 #if __X64
1101     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1102         jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
1103         ++_jitc->function->call.argi;
1104 #  if __CYGWIN__ || _WIN32
1105         if (_jitc->function->call.call & jit_call_varargs)
1106             jit_stxi(_jitc->function->call.size, _RSP, u);
1107         _jitc->function->call.size += sizeof(jit_word_t);
1108 #  endif
1109     }
1110     else
1111 #endif
1112     {
1113         jit_stxi(_jitc->function->call.size, _RSP, u);
1114         _jitc->function->call.size += REAL_WORDSIZE;
1115         jit_check_frame();
1116     }
1117     jit_dec_synth();
1118 }
1119
1120 void
1121 _jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
1122 {
1123     jit_int32_t          regno;
1124     assert(_jitc->function);
1125     jit_code_inc_synth_w(code, u);
1126     jit_link_prepare();
1127 #if __X64
1128     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1129         jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
1130 #  if __CYGWIN__ || _WIN32
1131         if (_jitc->function->call.call & jit_call_varargs)
1132             jit_stxi(_jitc->function->call.size, _RSP,
1133                      JIT_RA0 - _jitc->function->call.argi);
1134         _jitc->function->call.size += sizeof(jit_word_t);
1135 #  endif
1136         ++_jitc->function->call.argi;
1137     }
1138     else
1139 #endif
1140     {
1141         regno = jit_get_reg(jit_class_gpr);
1142         jit_movi(regno, u);
1143         jit_stxi(_jitc->function->call.size, _RSP, regno);
1144         _jitc->function->call.size += REAL_WORDSIZE;
1145         jit_unget_reg(regno);
1146         jit_check_frame();
1147     }
1148     jit_dec_synth();
1149 }
1150
1151 void
1152 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
1153 {
1154     assert(_jitc->function);
1155     jit_inc_synth_w(pushargr_f, u);
1156     jit_link_prepare();
1157 #if __X64
1158 #  if __CYGWIN__ || _WIN32
1159     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1160         jit_movr_f(_XMM0 - _jitc->function->call.argi, u);
1161         if (_jitc->function->call.call & jit_call_varargs) {
1162             jit_stxi_f(_jitc->function->call.size, _RSP,
1163                        _XMM0 - _jitc->function->call.argi);
1164             jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1165                        _jitc->function->call.size);
1166         }
1167         ++_jitc->function->call.argi;
1168         _jitc->function->call.size += sizeof(jit_word_t);
1169     }
1170 #  else
1171     if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
1172         jit_movr_f(_XMM0 - _jitc->function->call.argf, u);
1173         ++_jitc->function->call.argf;
1174     }
1175 #  endif
1176     else
1177 #endif
1178     {
1179         jit_stxi_f(_jitc->function->call.size, _RSP, u);
1180         _jitc->function->call.size += REAL_WORDSIZE;
1181         jit_check_frame();
1182     }
1183     jit_dec_synth();
1184 }
1185
1186 void
1187 _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
1188 {
1189     jit_int32_t         regno;
1190     assert(_jitc->function);
1191     jit_inc_synth_f(pushargi_f, u);
1192     jit_link_prepare();
1193 #if __X64
1194 #  if __CYGWIN__ || _WIN32
1195     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1196         jit_movi_f(_XMM0 - _jitc->function->call.argi, u);
1197         if (_jitc->function->call.call & jit_call_varargs) {
1198             jit_stxi_f(_jitc->function->call.size, _RSP,
1199                        _XMM0 - _jitc->function->call.argi);
1200             jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1201                        _jitc->function->call.size);
1202         }
1203         ++_jitc->function->call.argi;
1204         _jitc->function->call.size += sizeof(jit_word_t);
1205     }
1206 #  else
1207     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1208         jit_movi_f(_XMM0 - _jitc->function->call.argf, u);
1209         ++_jitc->function->call.argf;
1210     }
1211 #  endif
1212     else
1213 #endif
1214     {
1215         regno = jit_get_reg(jit_class_fpr);
1216         jit_movi_f(regno, u);
1217         jit_stxi_f(_jitc->function->call.size, _RSP, regno);
1218         _jitc->function->call.size += REAL_WORDSIZE;
1219         jit_unget_reg(regno);
1220         jit_check_frame();
1221     }
1222     jit_dec_synth();
1223 }
1224
1225 void
1226 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
1227 {
1228     assert(_jitc->function);
1229     jit_inc_synth_w(pushargr_d, u);
1230     jit_link_prepare();
1231 #if __X64
1232 #  if __CYGWIN__ || _WIN32
1233     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1234         jit_movr_d(_XMM0 - _jitc->function->call.argi, u);
1235         if (_jitc->function->call.call & jit_call_varargs) {
1236             jit_stxi_d(_jitc->function->call.size, _RSP,
1237                        _XMM0 - _jitc->function->call.argi);
1238             jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1239                        _jitc->function->call.size);
1240         }
1241         ++_jitc->function->call.argi;
1242         _jitc->function->call.size += sizeof(jit_word_t);
1243     }
1244 #  else
1245     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1246         jit_movr_d(_XMM0 - _jitc->function->call.argf, u);
1247         ++_jitc->function->call.argf;
1248     }
1249 #  endif
1250     else
1251 #endif
1252     {
1253         jit_stxi_d(_jitc->function->call.size, _RSP, u);
1254         _jitc->function->call.size += sizeof(jit_float64_t);
1255         jit_check_frame();
1256     }
1257     jit_dec_synth();
1258 }
1259
1260 void
1261 _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
1262 {
1263     jit_int32_t          regno;
1264     assert(_jitc->function);
1265     jit_inc_synth_d(pushargi_d, u);
1266     jit_link_prepare();
1267 #if __X64
1268 #  if __CYGWIN__ || _WIN32
1269     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1270         jit_movi_d(_XMM0 - _jitc->function->call.argi, u);
1271         if (_jitc->function->call.call & jit_call_varargs) {
1272             jit_stxi_d(_jitc->function->call.size, _RSP,
1273                        _XMM0 - _jitc->function->call.argi);
1274             jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1275                        _jitc->function->call.size);
1276         }
1277         ++_jitc->function->call.argi;
1278         _jitc->function->call.size += sizeof(jit_word_t);
1279     }
1280 #  else
1281     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1282         jit_movi_d(_XMM0 - _jitc->function->call.argf, u);
1283         ++_jitc->function->call.argf;
1284     }
1285 #  endif
1286     else
1287 #endif
1288     {
1289         regno = jit_get_reg(jit_class_fpr);
1290         jit_movi_d(regno, u);
1291         jit_stxi_d(_jitc->function->call.size, _RSP, regno);
1292         _jitc->function->call.size += sizeof(jit_float64_t);
1293         jit_unget_reg(regno);
1294         jit_check_frame();
1295     }
1296     jit_dec_synth();
1297 }
1298
1299 jit_bool_t
1300 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
1301 {
1302 #if __X64
1303     jit_int32_t         spec;
1304
1305     spec = jit_class(_rvs[regno].spec);
1306     if (spec & jit_class_arg) {
1307         if (spec & jit_class_gpr) {
1308             regno = JIT_RA0 - regno;
1309             if (regno >= 0 && regno < node->v.w)
1310                 return (1);
1311         }
1312         else if (spec & jit_class_fpr) {
1313             regno = _XMM0 - regno;
1314             if (regno >= 0 && regno < node->w.w)
1315                 return (1);
1316         }
1317     }
1318 #endif
1319     return (0);
1320 }
1321
1322 void
1323 _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
1324 {
1325     jit_int32_t          reg;
1326     jit_node_t          *call;
1327     assert(_jitc->function);
1328     jit_check_frame();
1329     reg = r0;
1330     jit_inc_synth_w(finishr, r0);
1331     if (_jitc->function->self.alen < _jitc->function->call.size)
1332         _jitc->function->self.alen = _jitc->function->call.size;
1333 #if __X64
1334 #  if !(__CYGWIN__ || _WIN32)
1335     if (_jitc->function->call.call & jit_call_varargs) {
1336         if (jit_regno(reg) == _RAX) {
1337             reg = jit_get_reg(jit_class_gpr);
1338             jit_movr(reg, _RAX);
1339         }
1340         if (_jitc->function->call.argf)
1341             jit_movi(_RAX, _jitc->function->call.argf);
1342         else
1343             jit_movi(_RAX, 0);
1344         if (reg != r0)
1345             jit_unget_reg(reg);
1346     }
1347 #  endif
1348 #endif
1349     call = jit_callr(reg);
1350     call->v.w = _jitc->function->call.argi;
1351     call->w.w = _jitc->function->call.argf;
1352     _jitc->function->call.argi = _jitc->function->call.argf =
1353         _jitc->function->call.size = 0;
1354     _jitc->prepare = 0;
1355     jit_dec_synth();
1356 }
1357
1358 jit_node_t *
1359 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
1360 {
1361     jit_node_t          *node;
1362     assert(_jitc->function);
1363     jit_check_frame();
1364     jit_inc_synth_w(finishi, (jit_word_t)i0);
1365     if (_jitc->function->self.alen < _jitc->function->call.size)
1366         _jitc->function->self.alen = _jitc->function->call.size;
1367 #if __X64
1368 #  if !(__CYGWIN__ || _WIN32)
1369     if (_jitc->function->call.call & jit_call_varargs) {
1370         if (_jitc->function->call.argf)
1371             jit_movi(_RAX, _jitc->function->call.argf);
1372         else
1373             jit_movi(_RAX, 0);
1374         jit_live(_RAX);
1375     }
1376 #  endif
1377 #endif
1378     node = jit_calli(i0);
1379     node->v.w = _jitc->function->call.argi;
1380     node->w.w = _jitc->function->call.argf;
1381     _jitc->function->call.argi = _jitc->function->call.argf =
1382         _jitc->function->call.size = 0;
1383     _jitc->prepare = 0;
1384     jit_dec_synth();
1385     return (node);
1386 }
1387
1388 void
1389 _jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
1390 {
1391     jit_inc_synth_w(retval_c, r0);
1392     jit_extr_c(r0, JIT_RET);
1393     jit_dec_synth();
1394 }
1395
1396 void
1397 _jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
1398 {
1399     jit_inc_synth_w(retval_uc, r0);
1400     jit_extr_uc(r0, JIT_RET);
1401     jit_dec_synth();
1402 }
1403
1404 void
1405 _jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
1406 {
1407     jit_inc_synth_w(retval_s, r0);
1408     jit_extr_s(r0, JIT_RET);
1409     jit_dec_synth();
1410 }
1411
1412 void
1413 _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
1414 {
1415     jit_inc_synth_w(retval_us, r0);
1416     jit_extr_us(r0, JIT_RET);
1417     jit_dec_synth();
1418 }
1419
1420 void
1421 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
1422 {
1423     jit_inc_synth_w(retval_i, r0);
1424 #if __X32 || __X64_32
1425     if (r0 != JIT_RET)
1426         jit_movr(r0, JIT_RET);
1427 #else
1428     jit_extr_i(r0, JIT_RET);
1429 #endif
1430     jit_dec_synth();
1431 }
1432
1433 #if __X64 && !__X64_32
1434 void
1435 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
1436 {
1437     jit_inc_synth_w(retval_ui, r0);
1438     jit_extr_ui(r0, JIT_RET);
1439     jit_dec_synth();
1440 }
1441
1442 void
1443 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
1444 {
1445     jit_inc_synth_w(retval_l, r0);
1446     if (r0 != JIT_RET)
1447         jit_movr(r0, JIT_RET);
1448     jit_dec_synth();
1449 }
1450 #endif
1451
1452 void
1453 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
1454 {
1455     jit_inc_synth_w(retval_f, r0);
1456 #if __X64
1457     if (r0 != JIT_FRET)
1458         jit_movr_f(r0, JIT_FRET);
1459 #endif
1460     jit_dec_synth();
1461 }
1462
1463 void
1464 _jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
1465 {
1466     jit_inc_synth_w(retval_d, r0);
1467 #if __X64
1468     if (r0 != JIT_FRET)
1469         jit_movr_d(r0, JIT_FRET);
1470 #endif
1471     jit_dec_synth();
1472 }
1473
1474 jit_pointer_t
1475 _emit_code(jit_state_t *_jit)
1476 {
1477     jit_node_t          *node;
1478     jit_node_t          *temp;
1479     jit_word_t           word;
1480     jit_int32_t          value;
1481     jit_int32_t          offset;
1482     struct {
1483         jit_node_t      *node;
1484         jit_word_t       word;
1485         jit_function_t   func;
1486 #if DEVEL_DISASSEMBLER
1487         jit_word_t       prevw;
1488 #endif
1489         jit_int32_t      patch_offset;
1490     } undo;
1491 #if DEVEL_DISASSEMBLER
1492     jit_word_t           prevw;
1493 #endif
1494
1495     _jitc->function = NULL;
1496
1497     jit_reglive_setup();
1498
1499     undo.word = 0;
1500     undo.node = NULL;
1501     undo.patch_offset = 0;
1502 #define case_rr(name, type)                                             \
1503             case jit_code_##name##r##type:                              \
1504                 name##r##type(rn(node->u.w), rn(node->v.w));            \
1505                 break
1506 #define case_rw(name, type)                                             \
1507             case jit_code_##name##i##type:                              \
1508                 name##i##type(rn(node->u.w), node->v.w);                \
1509                 break
1510 #define case_rf(name, type)                                             \
1511             case jit_code_##name##r##type:                              \
1512                 if (jit_x87_reg_p(node->v.w))                           \
1513                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1514                 else                                                    \
1515                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1516                 break
1517 #define case_fr(name, type)                                             \
1518             case jit_code_##name##r##type:                              \
1519                 if (jit_x87_reg_p(node->u.w))                           \
1520                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1521                 else                                                    \
1522                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1523                 break
1524 #define case_fw(name, type)                                             \
1525             case jit_code_##name##i##type:                              \
1526                 if (jit_x87_reg_p(node->u.w))                           \
1527                     x87_##name##i##type(rn(node->u.w), node->v.w);      \
1528                 else                                                    \
1529                     sse_##name##i##type(rn(node->u.w), node->v.w);      \
1530                 break
1531 #define case_wr(name, type)                                             \
1532             case jit_code_##name##i##type:                              \
1533                 name##i##type(node->u.w, rn(node->v.w));                \
1534                 break
1535 #define case_wf(name, type)                                             \
1536             case jit_code_##name##i##type:                              \
1537                 if (jit_x87_reg_p(node->v.w))                           \
1538                     x87_##name##i##type(node->u.w, rn(node->v.w));      \
1539                 else                                                    \
1540                     sse_##name##i##type(node->u.w, rn(node->v.w));      \
1541                 break
1542 #define case_ff(name, type)                                             \
1543             case jit_code_##name##r##type:                              \
1544                 if (jit_x87_reg_p(node->u.w) &&                         \
1545                     jit_x87_reg_p(node->v.w))                           \
1546                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1547                 else                                                    \
1548                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1549                 break;
1550 #define case_rrr(name, type)                                            \
1551             case jit_code_##name##r##type:                              \
1552                 name##r##type(rn(node->u.w),                            \
1553                               rn(node->v.w), rn(node->w.w));            \
1554                 break
1555 #define case_rrrr(name, type)                                           \
1556             case jit_code_##name##r##type:                              \
1557                 name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
1558                               rn(node->v.w), rn(node->w.w));            \
1559                 break
1560 #define case_rqr(name, type)                                            \
1561             case jit_code_##name##r##type:                              \
1562                 if (jit_x87_reg_p(node->u.w) &&                         \
1563                     jit_x87_reg_p(node->v.q.l) &&                       \
1564                     jit_x87_reg_p(node->v.q.h) &&                       \
1565                     jit_x87_reg_p(node->w.w))                           \
1566                     x87_##name##r##type(rn(node->u.w),                  \
1567                                         rn(node->v.q.l),                \
1568                                         rn(node->v.q.h),                \
1569                                         rn(node->w.w));                 \
1570                 else {                                                  \
1571                     assert(jit_sse_reg_p(node->u.w) &&                  \
1572                            jit_sse_reg_p(node->v.q.l) &&                \
1573                            jit_sse_reg_p(node->v.q.h) &&                \
1574                            jit_sse_reg_p(node->w.w));                   \
1575                     sse_##name##r##type(rn(node->u.w),                  \
1576                                         rn(node->v.q.l),                \
1577                                         rn(node->v.q.h),                \
1578                                         rn(node->w.w));                 \
1579                 }                                                       \
1580                 break;
1581 #define case_frr(name, type)                                            \
1582             case jit_code_##name##r##type:                              \
1583                 if (jit_x87_reg_p(node->u.w))                           \
1584                     x87_##name##r##type(rn(node->u.w),                  \
1585                                         rn(node->v.w), rn(node->w.w));  \
1586                 else                                                    \
1587                     sse_##name##r##type(rn(node->u.w),                  \
1588                                         rn(node->v.w), rn(node->w.w));  \
1589                 break
1590 #define case_rrf(name, type)                                            \
1591             case jit_code_##name##r##type:                              \
1592                 if (jit_x87_reg_p(node->w.w))                           \
1593                     x87_##name##r##type(rn(node->u.w),                  \
1594                                         rn(node->v.w), rn(node->w.w));  \
1595                 else                                                    \
1596                     sse_##name##r##type(rn(node->u.w),                  \
1597                                         rn(node->v.w), rn(node->w.w));  \
1598                 break
1599 #define case_rrw(name, type)                                            \
1600             case jit_code_##name##i##type:                              \
1601                 name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
1602                 break
1603 #define case_rrrw(name, type)                                           \
1604             case jit_code_##name##i##type:                              \
1605                 name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
1606                               rn(node->v.w), node->w.w);                \
1607                 break
1608 #define case_frw(name, type)                                            \
1609             case jit_code_##name##i##type:                              \
1610                 if (jit_x87_reg_p(node->u.w))                           \
1611                     x87_##name##i##type(rn(node->u.w),                  \
1612                                         rn(node->v.w), node->w.w);      \
1613                 else                                                    \
1614                     sse_##name##i##type(rn(node->u.w),                  \
1615                                         rn(node->v.w), node->w.w);      \
1616                 break
1617 #define case_wrr(name, type)                                            \
1618             case jit_code_##name##i##type:                              \
1619                 name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
1620                 break
1621 #define case_wrf(name, type)                                            \
1622             case jit_code_##name##i##type:                              \
1623                 if (jit_x87_reg_p(node->w.w))                           \
1624                     x87_##name##i##type(node->u.w,                      \
1625                                         rn(node->v.w), rn(node->w.w));  \
1626                 else                                                    \
1627                     sse_##name##i##type(node->u.w,                      \
1628                                         rn(node->v.w), rn(node->w.w));  \
1629                 break
1630 #define case_brr(name, type)                                            \
1631             case jit_code_##name##r##type:                              \
1632                 temp = node->u.n;                                       \
1633                 assert(temp->code == jit_code_label ||                  \
1634                        temp->code == jit_code_epilog);                  \
1635                 if (temp->flag & jit_flag_patch)                        \
1636                     name##r##type(temp->u.w, rn(node->v.w),             \
1637                                   rn(node->w.w));                       \
1638                 else {                                                  \
1639                     word = name##r##type(_jit->pc.w,                    \
1640                                          rn(node->v.w), rn(node->w.w)); \
1641                     patch(word, node);                                  \
1642                 }                                                       \
1643                 break
1644 #define case_brw(name, type)                                            \
1645             case jit_code_##name##i##type:                              \
1646                 temp = node->u.n;                                       \
1647                 assert(temp->code == jit_code_label ||                  \
1648                        temp->code == jit_code_epilog);                  \
1649                 if (temp->flag & jit_flag_patch)                        \
1650                     name##i##type(temp->u.w,                            \
1651                                   rn(node->v.w), node->w.w);            \
1652                 else {                                                  \
1653                     word = name##i##type(_jit->pc.w,                    \
1654                                          rn(node->v.w), node->w.w);     \
1655                     patch(word, node);                                  \
1656                 }                                                       \
1657                 break
1658 #define case_rff(name, type)                                            \
1659             case jit_code_##name##r##type:                              \
1660                 if (jit_x87_reg_p(node->v.w) &&                         \
1661                     jit_x87_reg_p(node->w.w))                           \
1662                     x87_##name##r##type(rn(node->u.w), rn(node->v.w),   \
1663                                         rn(node->w.w));                 \
1664                 else                                                    \
1665                     sse_##name##r##type(rn(node->u.w), rn(node->v.w),   \
1666                                         rn(node->w.w));                 \
1667                 break;
1668 #define case_rfw(name, type, size)                                      \
1669             case jit_code_##name##i##type:                              \
1670                 assert(node->flag & jit_flag_data);                     \
1671                 if (jit_x87_reg_p(node->v.w))                           \
1672                     x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1673                                 (jit_float##size##_t *)node->w.n->u.w); \
1674                 else                                                    \
1675                     sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1676                                 (jit_float##size##_t *)node->w.n->u.w); \
1677                 break
1678 #define case_fff(name, type)                                            \
1679             case jit_code_##name##r##type:                              \
1680                 if (jit_x87_reg_p(node->u.w) &&                         \
1681                     jit_x87_reg_p(node->v.w) &&                         \
1682                     jit_x87_reg_p(node->w.w))                           \
1683                     x87_##name##r##type(rn(node->u.w),                  \
1684                                         rn(node->v.w), rn(node->w.w));  \
1685                 else                                                    \
1686                     sse_##name##r##type(rn(node->u.w),                  \
1687                                         rn(node->v.w), rn(node->w.w));  \
1688                 break
1689 #define case_ffw(name, type, size)                                      \
1690             case jit_code_##name##i##type:                              \
1691                 assert(node->flag & jit_flag_data);                     \
1692                 if (jit_x87_reg_p(node->u.w) &&                         \
1693                     jit_x87_reg_p(node->v.w))                           \
1694                     x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1695                                 (jit_float##size##_t *)node->w.n->u.w); \
1696                 else                                                    \
1697                     sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1698                                 (jit_float##size##_t *)node->w.n->u.w); \
1699                 break
1700 #define case_bff(name, type)                                            \
1701             case jit_code_b##name##r##type:                             \
1702                 temp = node->u.n;                                       \
1703                 assert(temp->code == jit_code_label ||                  \
1704                        temp->code == jit_code_epilog);                  \
1705                 if (temp->flag & jit_flag_patch) {                      \
1706                     if (jit_x87_reg_p(node->v.w) &&                     \
1707                         jit_x87_reg_p(node->w.w))                       \
1708                         x87_b##name##r##type(temp->u.w,                 \
1709                                 rn(node->v.w), rn(node->w.w));          \
1710                     else                                                \
1711                         sse_b##name##r##type(temp->u.w,                 \
1712                                 rn(node->v.w), rn(node->w.w));          \
1713                 }                                                       \
1714                 else {                                                  \
1715                     if (jit_x87_reg_p(node->v.w) &&                     \
1716                         jit_x87_reg_p(node->w.w))                       \
1717                         word = x87_b##name##r##type(_jit->pc.w,         \
1718                                 rn(node->v.w), rn(node->w.w));          \
1719                     else                                                \
1720                         word = sse_b##name##r##type(_jit->pc.w,         \
1721                                 rn(node->v.w), rn(node->w.w));          \
1722                     patch(word, node);                                  \
1723                 }                                                       \
1724                 break
1725 #define case_bfw(name, type, size)                                      \
1726             case jit_code_b##name##i##type:                             \
1727                 temp = node->u.n;                                       \
1728                 assert(temp->code == jit_code_label ||                  \
1729                        temp->code == jit_code_epilog);                  \
1730                 if (temp->flag & jit_flag_patch) {                      \
1731                     if (jit_x87_reg_p(node->v.w))                       \
1732                         x87_b##name##i##type(temp->u.w,                 \
1733                                 rn(node->v.w),                          \
1734                                 (jit_float##size##_t *)node->w.n->u.w); \
1735                     else                                                \
1736                         sse_b##name##i##type(temp->u.w,                 \
1737                                 rn(node->v.w),                          \
1738                                 (jit_float##size##_t *)node->w.n->u.w); \
1739                 }                                                       \
1740                 else {                                                  \
1741                     if (jit_x87_reg_p(node->v.w))                       \
1742                         word = x87_b##name##i##type(_jit->pc.w,         \
1743                                 rn(node->v.w),                          \
1744                                 (jit_float##size##_t *)node->w.n->u.w); \
1745                     else                                                \
1746                         word = sse_b##name##i##type(_jit->pc.w,         \
1747                                 rn(node->v.w),                          \
1748                                 (jit_float##size##_t *)node->w.n->u.w); \
1749                     patch(word, node);                                  \
1750                 }                                                       \
1751                 break
1752 #if DEVEL_DISASSEMBLER
1753     prevw = _jit->pc.w;
1754 #endif
1755     for (node = _jitc->head; node; node = node->next) {
1756         if (_jit->pc.uc >= _jitc->code.end)
1757             return (NULL);
1758
1759 #if DEVEL_DISASSEMBLER
1760         node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
1761         prevw = _jit->pc.w;
1762 #endif
1763         value = jit_classify(node->code);
1764         jit_regarg_set(node, value);
1765         switch (node->code) {
1766             case jit_code_align:
1767                 /* Must align to a power of two */
1768                 assert(!(node->u.w & (node->u.w - 1)));
1769                 if ((word = _jit->pc.w & (node->u.w - 1)))
1770                     nop(node->u.w - word);
1771                 break;
1772             case jit_code_skip:
1773                 nop(node->u.w);
1774                 break;
1775             case jit_code_note:         case jit_code_name:
1776                 node->u.w = _jit->pc.w;
1777                 break;
1778             case jit_code_label:
1779                 if ((node->link || (node->flag & jit_flag_use)) &&
1780                     (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
1781                     nop(sizeof(jit_word_t) - word);
1782                 /* remember label is defined */
1783                 node->flag |= jit_flag_patch;
1784                 node->u.w = _jit->pc.w;
1785                 break;
1786                 case_rrr(add,);
1787                 case_rrw(add,);
1788                 case_rrr(addx,);
1789                 case_rrw(addx,);
1790                 case_rrr(addc,);
1791                 case_rrw(addc,);
1792                 case_rrr(sub,);
1793                 case_rrw(sub,);
1794                 case_rrr(subx,);
1795                 case_rrw(subx,);
1796                 case_rrr(subc,);
1797                 case_rrw(subc,);
1798                 case_rrw(rsb,);
1799                 case_rrr(mul,);
1800                 case_rrw(mul,);
1801                 case_rrr(hmul,);
1802                 case_rrw(hmul,);
1803                 case_rrr(hmul, _u);
1804                 case_rrw(hmul, _u);
1805                 case_rrrr(qmul,);
1806                 case_rrrw(qmul,);
1807                 case_rrrr(qmul, _u);
1808                 case_rrrw(qmul, _u);
1809                 case_rrr(div,);
1810                 case_rrw(div,);
1811                 case_rrr(div, _u);
1812                 case_rrw(div, _u);
1813                 case_rrrr(qdiv,);
1814                 case_rrrw(qdiv,);
1815                 case_rrrr(qdiv, _u);
1816                 case_rrrw(qdiv, _u);
1817                 case_rrr(rem,);
1818                 case_rrw(rem,);
1819                 case_rrr(rem, _u);
1820                 case_rrw(rem, _u);
1821                 case_rrr(and,);
1822                 case_rrw(and,);
1823                 case_rrr(or,);
1824                 case_rrw(or,);
1825                 case_rrr(xor,);
1826                 case_rrw(xor,);
1827                 case_rrr(lsh,);
1828                 case_rrw(lsh,);
1829                 case_rrrr(qlsh,);
1830                 case_rrrw(qlsh,);
1831                 case_rrrr(qlsh, _u);
1832                 case_rrrw(qlsh, _u);
1833                 case_rrr(rsh,);
1834                 case_rrw(rsh,);
1835                 case_rrrr(qrsh,);
1836                 case_rrrw(qrsh,);
1837                 case_rrr(rsh, _u);
1838                 case_rrw(rsh, _u);
1839                 case_rrrr(qrsh, _u);
1840                 case_rrrw(qrsh, _u);
1841                 case_rrr(lrot,);
1842                 case_rrw(lrot,);
1843                 case_rrr(rrot,);
1844                 case_rrw(rrot,);
1845                 case_rr(neg,);
1846                 case_rr(com,);
1847                 case_rr(clo,);
1848                 case_rr(clz,);
1849                 case_rr(cto,);
1850                 case_rr(ctz,);
1851                 case_rr(rbit,);
1852                 case_rr(popcnt,);
1853                 case_rrr(lt,);
1854                 case_rrw(lt,);
1855                 case_rrr(lt, _u);
1856                 case_rrw(lt, _u);
1857                 case_rrr(le,);
1858                 case_rrw(le,);
1859                 case_rrr(le, _u);
1860                 case_rrw(le, _u);
1861                 case_rrr(eq,);
1862                 case_rrw(eq,);
1863                 case_rrr(ge,);
1864                 case_rrw(ge,);
1865                 case_rrr(ge, _u);
1866                 case_rrw(ge, _u);
1867                 case_rrr(gt,);
1868                 case_rrw(gt,);
1869                 case_rrr(gt, _u);
1870                 case_rrw(gt, _u);
1871                 case_rrr(ne,);
1872                 case_rrw(ne,);
1873             case jit_code_casr:
1874                 casr(rn(node->u.w), rn(node->v.w),
1875                      rn(node->w.q.l), rn(node->w.q.h));
1876                 break;
1877             case jit_code_casi:
1878                 casi(rn(node->u.w), node->v.w,
1879                      rn(node->w.q.l), rn(node->w.q.h));
1880                 break;
1881                 case_rrr(movn,);
1882                 case_rrr(movz,);
1883                 case_rr(mov,);
1884             case jit_code_movi:
1885                 if (node->flag & jit_flag_node) {
1886                     temp = node->v.n;
1887                     if (temp->code == jit_code_data ||
1888                         (temp->code == jit_code_label &&
1889                          (temp->flag & jit_flag_patch)))
1890                         movi(rn(node->u.w), temp->u.w);
1891                     else {
1892                         assert(temp->code == jit_code_label ||
1893                                temp->code == jit_code_epilog);
1894 #if CAN_RIP_ADDRESS
1895                         word = _jit->code.length -
1896                             (_jit->pc.uc - _jit->code.ptr);
1897                         if ((jit_int32_t)word == word)
1898                             word = movi(rn(node->u.w), _jit->pc.w);
1899                         else
1900 #endif
1901                             word = movi_p(rn(node->u.w), node->v.w);
1902                         patch(word, node);
1903                     }
1904                 }
1905                 else
1906                     movi(rn(node->u.w), node->v.w);
1907                 break;
1908                 case_rr(hton, _us);
1909                 case_rr(hton, _ui);
1910 #if __X64 && !__X64_32
1911                 case_rr(hton, _ul);
1912 #endif
1913                 case_rr(bswap, _us);
1914                 case_rr(bswap, _ui);
1915 #if __X64 && !__X64_32
1916                 case_rr(bswap, _ul);
1917 #endif
1918             case jit_code_extr:
1919                 extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1920                 break;
1921             case jit_code_extr_u:
1922                 extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1923                 break;
1924             case jit_code_depr:
1925                 depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1926                 break;
1927             case jit_code_depi:
1928                 depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
1929                 break;
1930                 case_rr(ext, _c);
1931                 case_rr(ext, _uc);
1932                 case_rr(ext, _s);
1933                 case_rr(ext, _us);
1934 #if __X64 && !__X64_32
1935                 case_rr(ext, _i);
1936                 case_rr(ext, _ui);
1937 #endif
1938                 case_rf(trunc, _f_i);
1939                 case_rf(trunc, _d_i);
1940 #if __X64
1941                 case_rf(trunc, _f_l);
1942                 case_rf(trunc, _d_l);
1943 #endif
1944                 case_rr(ld, _c);
1945                 case_rw(ld, _c);
1946                 case_rr(ld, _uc);
1947                 case_rw(ld, _uc);
1948                 case_rr(ld, _s);
1949                 case_rw(ld, _s);
1950                 case_rr(ld, _us);
1951                 case_rw(ld, _us);
1952                 case_rr(ld, _i);
1953                 case_rw(ld, _i);
1954 #if __X64 && !__X64_32
1955                 case_rr(ld, _ui);
1956                 case_rw(ld, _ui);
1957                 case_rr(ld, _l);
1958                 case_rw(ld, _l);
1959 #endif
1960                 case_rrr(ldx, _c);
1961                 case_rrw(ldx, _c);
1962                 case_rrr(ldx, _uc);
1963                 case_rrw(ldx, _uc);
1964                 case_rrr(ldx, _s);
1965                 case_rrw(ldx, _s);
1966                 case_rrr(ldx, _us);
1967                 case_rrw(ldx, _us);
1968                 case_rrr(ldx, _i);
1969                 case_rrw(ldx, _i);
1970 #if __X64 && !__X64_32
1971                 case_rrr(ldx, _ui);
1972                 case_rrw(ldx, _ui);
1973                 case_rrr(ldx, _l);
1974                 case_rrw(ldx, _l);
1975 #endif
1976             case jit_code_unldr:
1977                 unldr(rn(node->u.w), rn(node->v.w), node->w.w);
1978                 break;
1979             case jit_code_unldi:
1980                 unldi(rn(node->u.w), node->v.w, node->w.w);
1981                 break;
1982             case jit_code_unldr_u:
1983                 unldr_u(rn(node->u.w), rn(node->v.w), node->w.w);
1984                 break;
1985             case jit_code_unldi_u:
1986                 unldi_u(rn(node->u.w), node->v.w, node->w.w);
1987                 break;
1988                 case_rr(st, _c);
1989                 case_wr(st, _c);
1990                 case_rr(st, _s);
1991                 case_wr(st, _s);
1992                 case_rr(st, _i);
1993                 case_wr(st, _i);
1994 #if __X64 && !__X64_32
1995                 case_rr(st, _l);
1996                 case_wr(st, _l);
1997 #endif
1998                 case_rrr(stx, _c);
1999                 case_wrr(stx, _c);
2000                 case_rrr(stx, _s);
2001                 case_wrr(stx, _s);
2002                 case_rrr(stx, _i);
2003                 case_wrr(stx, _i);
2004 #if __X64 && !__X64_32
2005                 case_rrr(stx, _l);
2006                 case_wrr(stx, _l);
2007 #endif
2008             case jit_code_unstr:
2009                 unstr(rn(node->u.w), rn(node->v.w), node->w.w);
2010                 break;
2011             case jit_code_unsti:
2012                 unsti(node->u.w, rn(node->v.w), node->w.w);
2013                 break;
2014                 case_brr(blt,);
2015                 case_brw(blt,);
2016                 case_brr(blt, _u);
2017                 case_brw(blt, _u);
2018                 case_brr(ble,);
2019                 case_brw(ble,);
2020                 case_brr(ble, _u);
2021                 case_brw(ble, _u);
2022                 case_brr(beq,);
2023                 case_brw(beq,);
2024                 case_brr(bge,);
2025                 case_brw(bge,);
2026                 case_brr(bge, _u);
2027                 case_brw(bge, _u);
2028                 case_brr(bgt,);
2029                 case_brw(bgt,);
2030                 case_brr(bgt, _u);
2031                 case_brw(bgt, _u);
2032                 case_brr(bne,);
2033                 case_brw(bne,);
2034                 case_brr(bms,);
2035                 case_brw(bms,);
2036                 case_brr(bmc,);
2037                 case_brw(bmc,);
2038                 case_brr(boadd,);
2039                 case_brw(boadd,);
2040                 case_brr(boadd, _u);
2041                 case_brw(boadd, _u);
2042                 case_brr(bxadd,);
2043                 case_brw(bxadd,);
2044                 case_brr(bxadd, _u);
2045                 case_brw(bxadd, _u);
2046                 case_brr(bosub,);
2047                 case_brw(bosub,);
2048                 case_brr(bosub, _u);
2049                 case_brw(bosub, _u);
2050                 case_brr(bxsub,);
2051                 case_brw(bxsub,);
2052                 case_brr(bxsub, _u);
2053                 case_brw(bxsub, _u);
2054                 case_fff(add, _f);
2055                 case_ffw(add, _f, 32);
2056                 case_fff(sub, _f);
2057                 case_ffw(sub, _f, 32);
2058                 case_ffw(rsb, _f, 32);
2059                 case_fff(mul, _f);
2060                 case_ffw(mul, _f, 32);
2061                 case_fff(div, _f);
2062                 case_ffw(div, _f, 32);
2063                 case_ff(abs, _f);
2064                 case_ff(neg, _f);
2065                 case_ff(sqrt, _f);
2066                 case_rqr(fma, _f);
2067                 case_rqr(fms, _f);
2068                 case_rqr(fnma, _f);
2069                 case_rqr(fnms, _f);
2070                 case_fr(ext, _f);
2071                 case_fr(ext, _d_f);
2072                 case_rff(lt, _f);
2073                 case_rfw(lt, _f, 32);
2074                 case_rff(le, _f);
2075                 case_rfw(le, _f, 32);
2076                 case_rff(eq, _f);
2077                 case_rfw(eq, _f, 32);
2078                 case_rff(ge, _f);
2079                 case_rfw(ge, _f, 32);
2080                 case_rff(gt, _f);
2081                 case_rfw(gt, _f, 32);
2082                 case_rff(ne, _f);
2083                 case_rfw(ne, _f, 32);
2084                 case_rff(unlt, _f);
2085                 case_rfw(unlt, _f, 32);
2086                 case_rff(unle, _f);
2087                 case_rfw(unle, _f, 32);
2088                 case_rff(uneq, _f);
2089                 case_rfw(uneq, _f, 32);
2090                 case_rff(unge, _f);
2091                 case_rfw(unge, _f, 32);
2092                 case_rff(ungt, _f);
2093                 case_rfw(ungt, _f, 32);
2094                 case_rff(ltgt, _f);
2095                 case_rfw(ltgt, _f, 32);
2096                 case_rff(ord, _f);
2097                 case_rfw(ord, _f, 32);
2098                 case_rff(unord, _f);
2099                 case_rfw(unord, _f, 32);
2100             case jit_code_movr_f:
2101                 if (jit_x87_reg_p(node->u.w)) {
2102                     if (jit_x87_reg_p(node->v.w))
2103                         x87_movr_f(rn(node->u.w), rn(node->v.w));
2104                     else
2105                         x87_from_sse_f(rn(node->u.w), rn(node->v.w));
2106                 }
2107                 else {
2108                     if (jit_sse_reg_p(node->v.w))
2109                         sse_movr_f(rn(node->u.w), rn(node->v.w));
2110                     else
2111                         sse_from_x87_f(rn(node->u.w), rn(node->v.w));
2112                 }
2113                 break;
2114             case jit_code_movi_f:
2115                 assert(node->flag & jit_flag_data);
2116                 if (jit_x87_reg_p(node->u.w))
2117                     x87_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
2118                 else
2119                     sse_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
2120                 break;
2121                 case_fr(ld, _f);
2122                 case_fw(ld, _f);
2123                 case_frr(ldx, _f);
2124                 case_frw(ldx, _f);
2125             case jit_code_unldr_x:
2126                 if (jit_x87_reg_p(node->u.w))
2127                     x87_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w);
2128                 else
2129                     sse_unldr_x(rn(node->u.w), rn(node->v.w), node->w.w);
2130                 break;
2131             case jit_code_unldi_x:
2132                 if (jit_x87_reg_p(node->u.w))
2133                     x87_unldi_x(rn(node->u.w), node->v.w, node->w.w);
2134                 else
2135                     sse_unldi_x(rn(node->u.w), node->v.w, node->w.w);
2136                 break;
2137                 case_rf(st, _f);
2138                 case_wf(st, _f);
2139                 case_rrf(stx, _f);
2140                 case_wrf(stx, _f);
2141             case jit_code_unstr_x:
2142                 if (jit_x87_reg_p(node->v.w))
2143                     x87_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w);
2144                 else
2145                     sse_unstr_x(rn(node->u.w), rn(node->v.w), node->w.w);
2146                 break;
2147             case jit_code_unsti_x:
2148                 if (jit_x87_reg_p(node->v.w))
2149                     x87_unsti_x(node->u.w, rn(node->v.w), node->w.w);
2150                 else
2151                     sse_unsti_x(node->u.w, rn(node->v.w), node->w.w);
2152                 break;
2153                 case_bff(lt, _f);
2154                 case_bfw(lt, _f, 32);
2155                 case_bff(le, _f);
2156                 case_bfw(le, _f, 32);
2157                 case_bff(eq, _f);
2158                 case_bfw(eq, _f, 32);
2159                 case_bff(ge, _f);
2160                 case_bfw(ge, _f, 32);
2161                 case_bff(gt, _f);
2162                 case_bfw(gt, _f, 32);
2163                 case_bff(ne, _f);
2164                 case_bfw(ne, _f, 32);
2165                 case_bff(unlt, _f);
2166                 case_bfw(unlt, _f, 32);
2167                 case_bff(unle, _f);
2168                 case_bfw(unle, _f, 32);
2169                 case_bff(uneq, _f);
2170                 case_bfw(uneq, _f, 32);
2171                 case_bff(unge, _f);
2172                 case_bfw(unge, _f, 32);
2173                 case_bff(ungt, _f);
2174                 case_bfw(ungt, _f, 32);
2175                 case_bff(ltgt, _f);
2176                 case_bfw(ltgt, _f, 32);
2177                 case_bff(ord, _f);
2178                 case_bfw(ord, _f, 32);
2179                 case_bff(unord, _f);
2180                 case_bfw(unord, _f, 32);
2181                 case_fff(add, _d);
2182                 case_ffw(add, _d, 64);
2183                 case_fff(sub, _d);
2184                 case_ffw(sub, _d, 64);
2185                 case_ffw(rsb, _d, 64);
2186                 case_fff(mul, _d);
2187                 case_ffw(mul, _d, 64);
2188                 case_fff(div, _d);
2189                 case_ffw(div, _d, 64);
2190                 case_ff(abs, _d);
2191                 case_ff(neg, _d);
2192                 case_ff(sqrt, _d);
2193                 case_rqr(fma, _d);
2194                 case_rqr(fms, _d);
2195                 case_rqr(fnma, _d);
2196                 case_rqr(fnms, _d);
2197                 case_fr(ext, _d);
2198                 case_fr(ext, _f_d);
2199                 case_rff(lt, _d);
2200                 case_rfw(lt, _d, 64);
2201                 case_rff(le, _d);
2202                 case_rfw(le, _d, 64);
2203                 case_rff(eq, _d);
2204                 case_rfw(eq, _d, 64);
2205                 case_rff(ge, _d);
2206                 case_rfw(ge, _d, 64);
2207                 case_rff(gt, _d);
2208                 case_rfw(gt, _d, 64);
2209                 case_rff(ne, _d);
2210                 case_rfw(ne, _d, 64);
2211                 case_rff(unlt, _d);
2212                 case_rfw(unlt, _d, 64);
2213                 case_rff(unle, _d);
2214                 case_rfw(unle, _d, 64);
2215                 case_rff(uneq, _d);
2216                 case_rfw(uneq, _d, 64);
2217                 case_rff(unge, _d);
2218                 case_rfw(unge, _d, 64);
2219                 case_rff(ungt, _d);
2220                 case_rfw(ungt, _d, 64);
2221                 case_rff(ltgt, _d);
2222                 case_rfw(ltgt, _d, 64);
2223                 case_rff(ord, _d);
2224                 case_rfw(ord, _d, 64);
2225                 case_rff(unord, _d);
2226                 case_rfw(unord, _d, 64);
2227             case jit_code_movr_d:
2228                 if (jit_x87_reg_p(node->u.w)) {
2229                     if (jit_x87_reg_p(node->v.w))
2230                         x87_movr_d(rn(node->u.w), rn(node->v.w));
2231                     else
2232                         x87_from_sse_d(rn(node->u.w), rn(node->v.w));
2233                 }
2234                 else {
2235                     if (jit_sse_reg_p(node->v.w))
2236                         sse_movr_d(rn(node->u.w), rn(node->v.w));
2237                     else
2238                         sse_from_x87_d(rn(node->u.w), rn(node->v.w));
2239                 }
2240                 break;
2241             case jit_code_movi_d:
2242                 assert(node->flag & jit_flag_data);
2243                 if (jit_x87_reg_p(node->u.w))
2244                     x87_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
2245                 else
2246                     sse_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
2247                 break;
2248                 case_fr(ld, _d);
2249                 case_fw(ld, _d);
2250                 case_frr(ldx, _d);
2251                 case_frw(ldx, _d);
2252                 case_rf(st, _d);
2253                 case_wf(st, _d);
2254                 case_rrf(stx, _d);
2255                 case_wrf(stx, _d);
2256                 case_bff(lt, _d);
2257                 case_bfw(lt, _d, 64);
2258                 case_bff(le, _d);
2259                 case_bfw(le, _d, 64);
2260                 case_bff(eq, _d);
2261                 case_bfw(eq, _d, 64);
2262                 case_bff(ge, _d);
2263                 case_bfw(ge, _d, 64);
2264                 case_bff(gt, _d);
2265                 case_bfw(gt, _d, 64);
2266                 case_bff(ne, _d);
2267                 case_bfw(ne, _d, 64);
2268                 case_bff(unlt, _d);
2269                 case_bfw(unlt, _d, 64);
2270                 case_bff(unle, _d);
2271                 case_bfw(unle, _d, 64);
2272                 case_bff(uneq, _d);
2273                 case_bfw(uneq, _d, 64);
2274                 case_bff(unge, _d);
2275                 case_bfw(unge, _d, 64);
2276                 case_bff(ungt, _d);
2277                 case_bfw(ungt, _d, 64);
2278                 case_bff(ltgt, _d);
2279                 case_bfw(ltgt, _d, 64);
2280                 case_bff(ord, _d);
2281                 case_bfw(ord, _d, 64);
2282                 case_bff(unord, _d);
2283                 case_bfw(unord, _d, 64);
2284             case jit_code_jmpr:
2285                 jit_check_frame();
2286                 jmpr(rn(node->u.w));
2287                 break;
2288             case jit_code_jmpi:
2289                 if (node->flag & jit_flag_node) {
2290                     temp = node->u.n;
2291                     assert(temp->code == jit_code_label ||
2292                            temp->code == jit_code_epilog);
2293                     if (temp->flag & jit_flag_patch)
2294                         jmpi(temp->u.w);
2295                     else {
2296 #if __X64
2297                         word = _jit->code.length -
2298                             (_jit->pc.uc - _jit->code.ptr);
2299                         if ((jit_int32_t)word == word)
2300                             word = jmpi(_jit->pc.w);
2301                         else
2302 #endif
2303                             word = jmpi_p(_jit->pc.w);
2304                         patch(word, node);
2305                     }
2306                 }
2307                 else {
2308                     jit_check_frame();
2309                     jmpi(node->u.w);
2310                 }
2311                 break;
2312             case jit_code_callr:
2313                 jit_check_frame();
2314                 callr(rn(node->u.w));
2315                 break;
2316             case jit_code_calli:
2317                 if (node->flag & jit_flag_node) {
2318                     temp = node->u.n;
2319                     assert(temp->code == jit_code_label ||
2320                            temp->code == jit_code_epilog);
2321                     if (temp->flag & jit_flag_patch)
2322                         calli(temp->u.w);
2323                     else {
2324 #if __X64
2325                         word = _jit->code.length -
2326                             (_jit->pc.uc - _jit->code.ptr);
2327                         if ((jit_int32_t)word == word)
2328                             word = calli(_jit->pc.w);
2329                         else
2330 #endif
2331                             word = calli_p(_jit->pc.w);
2332                         patch(word, node);
2333                     }
2334                 }
2335                 else {
2336                     jit_check_frame();
2337                     calli(node->u.w);
2338                 }
2339                 break;
2340             case jit_code_prolog:
2341                 _jitc->function = _jitc->functions.ptr + node->w.w;
2342                 undo.node = node;
2343                 undo.word = _jit->pc.w;
2344                 memcpy(&undo.func, _jitc->function, sizeof(undo.func));
2345 #if DEVEL_DISASSEMBLER
2346                 undo.prevw = prevw;
2347 #endif
2348                 undo.patch_offset = _jitc->patches.offset;
2349             restart_function:
2350                 compute_framesize();
2351                 patch_alist(0);
2352                 _jitc->again = 0;
2353                 prolog(node);
2354                 break;
2355             case jit_code_epilog:
2356                 assert(_jitc->function == _jitc->functions.ptr + node->w.w);
2357                 if (_jitc->again) {
2358                     for (temp = undo.node->next;
2359                          temp != node; temp = temp->next) {
2360                         if (temp->code == jit_code_label ||
2361                             temp->code == jit_code_epilog)
2362                             temp->flag &= ~jit_flag_patch;
2363                     }
2364                     temp->flag &= ~jit_flag_patch;
2365                     node = undo.node;
2366                     _jit->pc.w = undo.word;
2367                     /* undo.func.self.aoff and undo.func.regset should not
2368                      * be undone, as they will be further updated, and are
2369                      * the reason of the undo. */
2370                     undo.func.self.aoff = _jitc->function->frame +
2371                         _jitc->function->self.aoff;
2372                     undo.func.need_frame = _jitc->function->need_frame;
2373                     jit_regset_set(&undo.func.regset, &_jitc->function->regset);
2374                     /* allocar information also does not need to be undone */
2375                     undo.func.aoffoff = _jitc->function->aoffoff;
2376                     undo.func.allocar = _jitc->function->allocar;
2377                     /* real stack framesize is not in the jit_function_t,
2378                      * if it were, would need to not be undone  */
2379                     /* cvt_offset must also not be undone */
2380                     undo.func.cvt_offset = _jitc->function->cvt_offset;
2381                     /* this will be recomputed but undo anyway to have it
2382                      * better self documented.*/
2383                     undo.func.need_stack = _jitc->function->need_stack;
2384                     memcpy(_jitc->function, &undo.func, sizeof(undo.func));
2385 #if DEVEL_DISASSEMBLER
2386                     prevw = undo.prevw;
2387 #endif
2388                     _jitc->patches.offset = undo.patch_offset;
2389                     patch_alist(1);
2390                     goto restart_function;
2391                 }
2392                 if (node->link &&
2393                     (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
2394                     nop(sizeof(jit_word_t) - word);
2395                 /* remember label is defined */
2396                 node->flag |= jit_flag_patch;
2397                 node->u.w = _jit->pc.w;
2398                 epilog(node);
2399                 _jitc->function = NULL;
2400                 break;
2401             case jit_code_movr_w_f:
2402                 if (jit_sse_reg_p(node->u.w))
2403                     sse_movr_w_f(rn(node->u.w), rn(node->v.w));
2404                 else
2405                     x87_movr_w_f(rn(node->u.w), rn(node->v.w));
2406                 break;
2407             case jit_code_movr_f_w:
2408                 if (jit_sse_reg_p(node->v.w))
2409                     sse_movr_f_w(rn(node->u.w), rn(node->v.w));
2410                 else
2411                     x87_movr_f_w(rn(node->u.w), rn(node->v.w));
2412                 break;
2413             case jit_code_movi_f_w:
2414                 assert(node->flag & jit_flag_data);
2415                 movi_f_w(rn(node->u.w), *(jit_float32_t *)node->v.n->u.w);
2416                 break;
2417             case jit_code_movi_w_f:
2418                 if (jit_sse_reg_p(node->u.w))
2419                     sse_movi_w_f(rn(node->u.w), node->v.w);
2420                 else
2421                     x87_movi_w_f(rn(node->u.w), node->v.w);
2422                 break;
2423 #  if __X32 || __X64_32
2424             case jit_code_movr_ww_d:
2425                 if (jit_sse_reg_p(node->u.w))
2426                     sse_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
2427                 else
2428                     x87_movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
2429                 break;
2430             case jit_code_movr_d_ww:
2431                 if (jit_sse_reg_p(node->w.w))
2432                     sse_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
2433                 else
2434                     x87_movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
2435                 break;
2436             case jit_code_movi_d_ww:
2437                 assert(node->flag & jit_flag_data);
2438                 movi_d_ww(rn(node->u.w), rn(node->v.w),
2439                           *(jit_float64_t *)node->w.n->u.w);
2440                 break;
2441             case jit_code_movi_ww_d:
2442                 if (jit_sse_reg_p(node->u.w))
2443                     sse_movi_ww_d(rn(node->u.w), node->v.w, node->w.w);
2444                 else
2445                     x87_movi_ww_d(rn(node->u.w), node->v.w, node->w.w);
2446                 break;
2447 #  else
2448             case jit_code_movr_w_d:
2449                 if (jit_sse_reg_p(node->u.w))
2450                     sse_movr_w_d(rn(node->u.w), rn(node->v.w));
2451                 else
2452                     x87_movr_w_d(rn(node->u.w), rn(node->v.w));
2453                 break;
2454             case jit_code_movr_d_w:
2455                 if (jit_sse_reg_p(node->v.w))
2456                     sse_movr_d_w(rn(node->u.w), rn(node->v.w));
2457                 else
2458                     x87_movr_d_w(rn(node->u.w), rn(node->v.w));
2459                 break;
2460             case jit_code_movi_d_w:
2461                 assert(node->flag & jit_flag_data);
2462                 movi_d_w(rn(node->u.w), *(jit_float64_t *)node->v.n->u.w);
2463                 break;
2464             case jit_code_movi_w_d:
2465                 if (jit_sse_reg_p(node->u.w))
2466                     sse_movi_w_d(rn(node->u.w), node->v.w);
2467                 else
2468                     x87_movi_w_d(rn(node->u.w), node->v.w);
2469                 break;
2470 #  endif
2471             case jit_code_va_start:
2472                 vastart(rn(node->u.w));
2473                 break;
2474             case jit_code_va_arg:
2475                 vaarg(rn(node->u.w), rn(node->v.w));
2476                 break;
2477             case jit_code_va_arg_d:
2478                 vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w));
2479                 break;
2480             case jit_code_live:                 case jit_code_ellipsis:
2481             case jit_code_va_push:
2482             case jit_code_allocai:              case jit_code_allocar:
2483             case jit_code_arg_c:                case jit_code_arg_s:
2484             case jit_code_arg_i:
2485 #  if __WORDSIZE == 64
2486             case jit_code_arg_l:
2487 #  endif
2488             case jit_code_arg_f:                case jit_code_arg_d:
2489             case jit_code_va_end:
2490             case jit_code_ret:
2491             case jit_code_retr_c:               case jit_code_reti_c:
2492             case jit_code_retr_uc:              case jit_code_reti_uc:
2493             case jit_code_retr_s:               case jit_code_reti_s:
2494             case jit_code_retr_us:              case jit_code_reti_us:
2495             case jit_code_retr_i:               case jit_code_reti_i:
2496 #if __WORDSIZE == 64
2497             case jit_code_retr_ui:              case jit_code_reti_ui:
2498             case jit_code_retr_l:               case jit_code_reti_l:
2499 #endif
2500             case jit_code_retr_f:               case jit_code_reti_f:
2501             case jit_code_retr_d:               case jit_code_reti_d:
2502             case jit_code_getarg_c:             case jit_code_getarg_uc:
2503             case jit_code_getarg_s:             case jit_code_getarg_us:
2504             case jit_code_getarg_i:
2505 #if __X64 && !__X64_32
2506             case jit_code_getarg_ui:            case jit_code_getarg_l:
2507 #endif
2508             case jit_code_getarg_f:             case jit_code_getarg_d:
2509             case jit_code_putargr_c:            case jit_code_putargi_c:
2510             case jit_code_putargr_uc:           case jit_code_putargi_uc:
2511             case jit_code_putargr_s:            case jit_code_putargi_s:
2512             case jit_code_putargr_us:           case jit_code_putargi_us:
2513             case jit_code_putargr_i:            case jit_code_putargi_i:
2514 #if __WORDSIZE == 64
2515             case jit_code_putargr_ui:           case jit_code_putargi_ui:
2516             case jit_code_putargr_l:            case jit_code_putargi_l:
2517 #endif
2518             case jit_code_putargr_f:            case jit_code_putargi_f:
2519             case jit_code_putargr_d:            case jit_code_putargi_d:
2520             case jit_code_pushargr_c:           case jit_code_pushargi_c:
2521             case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
2522             case jit_code_pushargr_s:           case jit_code_pushargi_s:
2523             case jit_code_pushargr_us:          case jit_code_pushargi_us:
2524             case jit_code_pushargr_i:           case jit_code_pushargi_i:
2525 #if __WORDSIZE == 64
2526             case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
2527             case jit_code_pushargr_l:           case jit_code_pushargi_l:
2528 #endif
2529             case jit_code_pushargr_f:           case jit_code_pushargi_f:
2530             case jit_code_pushargr_d:           case jit_code_pushargi_d:
2531             case jit_code_retval_c:             case jit_code_retval_uc:
2532             case jit_code_retval_s:             case jit_code_retval_us:
2533             case jit_code_retval_i:
2534 #if __X64 && !__X32
2535             case jit_code_retval_ui:            case jit_code_retval_l:
2536 #endif
2537             case jit_code_prepare:
2538             case jit_code_finishr:              case jit_code_finishi:
2539             case jit_code_negi_f:               case jit_code_absi_f:
2540             case jit_code_sqrti_f:              case jit_code_negi_d:
2541             case jit_code_absi_d:               case jit_code_sqrti_d:
2542             case jit_code_fmai_f:               case jit_code_fmsi_f:
2543             case jit_code_fmai_d:               case jit_code_fmsi_d:
2544             case jit_code_fnmai_f:              case jit_code_fnmsi_f:
2545             case jit_code_fnmai_d:              case jit_code_fnmsi_d:
2546                 break;
2547             case jit_code_retval_f:
2548 #if __X32
2549                 if (jit_sse_reg_p(node->u.w)) {
2550                     fstpr(_ST1_REGNO);
2551                     sse_from_x87_f(rn(node->u.w), _ST0_REGNO);
2552                 }
2553                 else
2554                     fstpr(rn(node->u.w) + 1);
2555 #endif
2556                 break;
2557             case jit_code_retval_d:
2558 #if __X32
2559                 if (jit_sse_reg_p(node->u.w)) {
2560                     fstpr(_ST1_REGNO);
2561                     sse_from_x87_d(rn(node->u.w), _ST0_REGNO);
2562                 }
2563                 else
2564                     fstpr(rn(node->u.w) + 1);
2565 #endif
2566                 break;
2567             case jit_code_negi:
2568                 negi(rn(node->u.w), node->v.w);
2569                 break;
2570             case jit_code_comi:
2571                 comi(rn(node->u.w), node->v.w);
2572                 break;
2573             case jit_code_exti_c:
2574                 exti_c(rn(node->u.w), node->v.w);
2575                 break;
2576             case jit_code_exti_uc:
2577                 exti_uc(rn(node->u.w), node->v.w);
2578                 break;
2579             case jit_code_exti_s:
2580                 exti_s(rn(node->u.w), node->v.w);
2581                 break;
2582             case jit_code_exti_us:
2583                 exti_us(rn(node->u.w), node->v.w);
2584                 break;
2585             case jit_code_bswapi_us:
2586                 bswapi_us(rn(node->u.w), node->v.w);
2587                 break;
2588             case jit_code_bswapi_ui:
2589                 bswapi_ui(rn(node->u.w), node->v.w);
2590                 break;
2591             case jit_code_htoni_us:
2592                 htoni_us(rn(node->u.w), node->v.w);
2593                 break;
2594             case jit_code_htoni_ui:
2595                 htoni_ui(rn(node->u.w), node->v.w);
2596                 break;
2597 #if __X64 && !__X64_32
2598             case jit_code_exti_i:
2599                 exti_i(rn(node->u.w), node->v.w);
2600                 break;
2601             case jit_code_exti_ui:
2602                 exti_ui(rn(node->u.w), node->v.w);
2603                 break;
2604             case jit_code_bswapi_ul:
2605                 bswapi_ul(rn(node->u.w), node->v.w);
2606                 break;
2607             case jit_code_htoni_ul:
2608                 htoni_ul(rn(node->u.w), node->v.w);
2609                 break;
2610 #endif
2611             case jit_code_cloi:
2612                 cloi(rn(node->u.w), node->v.w);
2613                 break;
2614             case jit_code_clzi:
2615                 clzi(rn(node->u.w), node->v.w);
2616                 break;
2617             case jit_code_ctoi:
2618                 ctoi(rn(node->u.w), node->v.w);
2619                 break;
2620             case jit_code_ctzi:
2621                 ctzi(rn(node->u.w), node->v.w);
2622                 break;
2623             case jit_code_rbiti:
2624                 rbiti(rn(node->u.w), node->v.w);
2625                 break;
2626             case jit_code_popcnti:
2627                 popcnti(rn(node->u.w), node->v.w);
2628                 break;
2629             case jit_code_exti:
2630                 exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
2631                 break;
2632             case jit_code_exti_u:
2633                 exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
2634                 break;
2635             default:
2636                 abort();
2637         }
2638         jit_regarg_clr(node, value);
2639         assert(_jitc->regarg == 0 && _jitc->synth == 0);
2640         /* update register live state */
2641         jit_reglive(node);
2642     }
2643 #undef case_bfw
2644 #undef case_bff
2645 #undef case_ffw
2646 #undef case_rfw
2647 #undef case_rff
2648 #undef case_brw
2649 #undef case_brr
2650 #undef case_wrf
2651 #undef case_wrr
2652 #undef case_frw
2653 #undef case_rrf
2654 #undef case_rrw
2655 #undef case_frr
2656 #undef case_rrr
2657 #undef case_wf
2658 #undef case_fw
2659 #undef case_fr
2660 #undef case_rr
2661
2662     for (offset = 0; offset < _jitc->patches.offset; offset++) {
2663         node = _jitc->patches.ptr[offset].node;
2664         word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
2665         patch_at(_jitc->patches.ptr[offset].inst, word);
2666     }
2667
2668     jit_flush(_jit->code.ptr, _jit->pc.uc);
2669
2670     return (_jit->code.ptr);
2671 }
2672
2673 #define CODE                            1
2674 #  include "jit_x86-cpu.c"
2675 #  include "jit_x86-sse.c"
2676 #  include "jit_x86-x87.c"
2677 #undef CODE
2678
2679 void
2680 jit_flush(void *fptr, void *tptr)
2681 {
2682 }
2683
2684 void
2685 _emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
2686 {
2687     ldxi(rn(r0), rn(r1), i0);
2688 }
2689
2690 void
2691 _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
2692 {
2693     stxi(i0, rn(r0), rn(r1));
2694 }
2695
2696 void
2697 _emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
2698 {
2699     if (jit_x87_reg_p(r0))
2700         x87_ldxi_d(rn(r0), rn(r1), i0);
2701     else
2702         sse_ldxi_d(rn(r0), rn(r1), i0);
2703 }
2704
2705 void
2706 _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
2707 {
2708     if (jit_x87_reg_p(r1))
2709         x87_stxi_d(i0, rn(r0), rn(r1));
2710     else
2711         sse_stxi_d(i0, rn(r0), rn(r1));
2712 }
2713
2714 static void
2715 _compute_framesize(jit_state_t *_jit)
2716 {
2717     jit_int32_t         reg;
2718     /* Save stack pointer in first slot */
2719     _jitc->framesize = REAL_WORDSIZE;
2720     for (reg = 0; reg < jit_size(iregs); reg++)
2721         if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
2722             _jitc->framesize += REAL_WORDSIZE;
2723
2724 #if __X64 && (__CYGWIN__ || _WIN32)
2725     for (reg = 0; reg < jit_size(fregs); reg++)
2726         if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
2727             _jitc->framesize += sizeof(jit_float64_t);
2728 #endif
2729     /* Make sure functions called have a 16 byte aligned stack */
2730     _jitc->framesize = (_jitc->framesize + 15) & -16;
2731     _jitc->framesize += 16 - REAL_WORDSIZE;
2732 }
2733
2734 static void
2735 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
2736 {
2737     jit_int32_t         flag;
2738
2739     assert(node->flag & jit_flag_node);
2740     if (node->code == jit_code_movi)
2741         flag = node->v.n->flag;
2742     else
2743         flag = node->u.n->flag;
2744     assert(!(flag & jit_flag_patch));
2745     if (_jitc->patches.offset >= _jitc->patches.length) {
2746         jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
2747                     _jitc->patches.length * sizeof(jit_patch_t),
2748                     (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
2749         _jitc->patches.length += 1024;
2750     }
2751     _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
2752     _jitc->patches.ptr[_jitc->patches.offset].node = node;
2753     ++_jitc->patches.offset;
2754 }
2755
2756 static void
2757 _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2758 {
2759     CHECK_CVT_OFFSET();
2760     x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2761     sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2762 }
2763
2764 static void
2765 _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2766 {
2767     CHECK_CVT_OFFSET();
2768     x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2769     sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2770 }
2771
2772 static void
2773 _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2774 {
2775     CHECK_CVT_OFFSET();
2776     sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2777     x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2778 }
2779
2780 static void
2781 _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2782 {
2783     CHECK_CVT_OFFSET();
2784     sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2785     x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2786 }