Merge pull request #718 from pcercuei/update-lightrec-20230224
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86.c
1 /*
2  * Copyright (C) 2012-2023  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #include <lightning.h>
21 #include <lightning/jit_private.h>
22
23 #if __X32
24 #  define CAN_RIP_ADDRESS               0
25 #  define address_p(i0)                 1
26 #  define jit_arg_reg_p(i)              0
27 #  define jit_arg_f_reg_p(i)            0
28 /* callee save                        + 16 byte align
29  * align16(%ebp + %rbx + %rsi + %rdi) + (16 - 4)  */
30 #  define stack_framesize               28
31 #  define REAL_WORDSIZE                 4
32 #  define va_gp_increment               4
33 #  define va_fp_increment               8
34 #else
35 #  if _WIN32 || __X64_32
36 #    define CAN_RIP_ADDRESS             0
37 #  else
38 #    define CAN_RIP_ADDRESS             1
39 #  endif
40 #  if __X64_32
41 #    define address_p(i0)               ((jit_word_t)(i0) >= 0)
42 #  else
43 #    define address_p(i0)               can_sign_extend_int_p(i0)
44 #  endif
45 #  if __CYGWIN__ || _WIN32
46 #    define jit_arg_reg_p(i)            ((i) >= 0 && (i) < 4)
47 #    define jit_arg_f_reg_p(i)          jit_arg_reg_p(i)
48 /* callee save                                                + 16 byte align
49  * align16(%rbp+%rbx+%rdi+%rsi+%r1[2-5]+%xmm[6-9]+%xmm1[0-5]) + (16 - 8) */
50 #    define stack_framesize             152
51 #    define va_fp_increment             8
52 #  else
53 #    define jit_arg_reg_p(i)            ((i) >= 0 && (i) < 6)
54 #    define jit_arg_f_reg_p(i)          ((i) >= 0 && (i) < 8)
55 /* callee save                                      + 16 byte align
56  * align16(%rbp + %r15 + %r14 + %r13 + %r12 + %rbx) + (16 - 8) */
57 #    define stack_framesize             56
58 #    define first_gp_argument           rdi
59 #    define first_gp_offset             offsetof(jit_va_list_t, rdi)
60 #    define first_gp_from_offset(gp)    ((gp) / 8)
61 #    define last_gp_argument            r9
62 #    define va_gp_max_offset                                            \
63         (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
64 #    define first_fp_argument           xmm0
65 #    define first_fp_offset             offsetof(jit_va_list_t, xmm0)
66 #    define last_fp_argument            xmm7
67 #    define va_fp_max_offset                                            \
68         (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
69 #    define va_fp_increment             16
70 #    define first_fp_from_offset(fp)    (((fp) - va_gp_max_offset) / 16)
71 #  endif
72 #  define va_gp_increment               8
73 #  define REAL_WORDSIZE                 8
74 #endif
75 #define CVT_OFFSET                      _jitc->function->cvt_offset
76
77 #define CHECK_CVT_OFFSET()                                              \
78     do {                                                                \
79         if (!_jitc->function->cvt_offset) {                             \
80             _jitc->again = 1;                                           \
81             _jitc->function->cvt_offset =                               \
82                  jit_allocai(sizeof(jit_float64_t));                    \
83         }                                                               \
84     } while (0)
85
86 /*
87  * Types
88  */
89 #if __X32 || __CYGWIN__ || _WIN32
90 typedef jit_pointer_t jit_va_list_t;
91 #else
92 typedef struct jit_va_list {
93     jit_int32_t         gpoff;
94     jit_int32_t         fpoff;
95     jit_pointer_t       over;
96     jit_pointer_t       save;
97     /* Declared explicitly as int64 for the x32 abi */
98     jit_int64_t         rdi;
99     jit_int64_t         rsi;
100     jit_int64_t         rdx;
101     jit_int64_t         rcx;
102     jit_int64_t         r8;
103     jit_int64_t         r9;
104     jit_float64_t       xmm0;
105     jit_float64_t       _up0;
106     jit_float64_t       xmm1;
107     jit_float64_t       _up1;
108     jit_float64_t       xmm2;
109     jit_float64_t       _up2;
110     jit_float64_t       xmm3;
111     jit_float64_t       _up3;
112     jit_float64_t       xmm4;
113     jit_float64_t       _up4;
114     jit_float64_t       xmm5;
115     jit_float64_t       _up5;
116     jit_float64_t       xmm6;
117     jit_float64_t       _up6;
118     jit_float64_t       xmm7;
119     jit_float64_t       _up7;
120 } jit_va_list_t;
121 #endif
122
123 /*
124  * Prototypes
125  */
126 #define compute_framesize()             _compute_framesize(_jit)
127 static void _compute_framesize(jit_state_t*);
128 #define patch(instr, node)              _patch(_jit, instr, node)
129 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
130 #define sse_from_x87_f(r0, r1)          _sse_from_x87_f(_jit, r0, r1)
131 static void _sse_from_x87_f(jit_state_t*,jit_int32_t,jit_int32_t);
132 #define sse_from_x87_d(r0, r1)          _sse_from_x87_d(_jit, r0, r1)
133 static void _sse_from_x87_d(jit_state_t*,jit_int32_t,jit_int32_t);
134 #define x87_from_sse_f(r0, r1)          _x87_from_sse_f(_jit, r0, r1)
135 static void _x87_from_sse_f(jit_state_t*,jit_int32_t,jit_int32_t);
136 #define x87_from_sse_d(r0, r1)          _x87_from_sse_d(_jit, r0, r1)
137 static void _x87_from_sse_d(jit_state_t*,jit_int32_t,jit_int32_t);
138
139 #define PROTO                           1
140 #  include "jit_x86-cpu.c"
141 #  include "jit_x86-sse.c"
142 #  include "jit_x86-x87.c"
143 #undef PROTO
144
145 /*
146  * Initialization
147  */
148 jit_cpu_t               jit_cpu;
149 jit_register_t          _rvs[] = {
150 #if __X32
151     { rc(gpr) | rc(rg8) | 0,            "%eax" },
152     { rc(gpr) | rc(rg8) | 1,            "%ecx" },
153     { rc(gpr) | rc(rg8) | 2,            "%edx" },
154     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%ebx" },
155     { rc(sav) | rc(gpr) | 6,            "%esi" },
156     { rc(sav) | rc(gpr) | 7,            "%edi" },
157     { rc(sav) | 4,                      "%esp" },
158     { rc(sav) | 5,                      "%ebp" },
159     { rc(xpr) | rc(fpr) | 0,            "%xmm0" },
160     { rc(xpr) | rc(fpr) | 1,            "%xmm1" },
161     { rc(xpr) | rc(fpr) | 2,            "%xmm2" },
162     { rc(xpr) | rc(fpr) | 3,            "%xmm3" },
163     { rc(xpr) | rc(fpr) | 4,            "%xmm4" },
164     { rc(xpr) | rc(fpr) | 5,            "%xmm5" },
165     { rc(xpr) | rc(fpr) | 6,            "%xmm6" },
166     { rc(xpr) | rc(fpr) | 7,            "%xmm7" },
167     { rc(fpr) | 0,                      "st(0)" },
168     { rc(fpr) | 1,                      "st(1)" },
169     { rc(fpr) | 2,                      "st(2)" },
170     { rc(fpr) | 3,                      "st(3)" },
171     { rc(fpr) | 4,                      "st(4)" },
172     { rc(fpr) | 5,                      "st(5)" },
173     { rc(fpr) | 6,                      "st(6)" },
174     { rc(fpr) | 7,                      "st(7)" },
175 #else
176 #  if __CYGWIN__ || _WIN32
177     { rc(gpr) | rc(rg8) | 0,            "%rax" },
178     { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" },
179     { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" },
180     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%rbx" },
181     { rc(sav) | rc(gpr) | 7,            "%rdi" },
182     { rc(sav) | rc(gpr) | 6,            "%rsi" },
183     { rc(sav) | rc(gpr) | 12,           "%r12" },
184     { rc(sav) | rc(gpr) | 13,           "%r13" },
185     { rc(sav) | rc(gpr) | 14,           "%r14" },
186     { rc(sav) | rc(gpr) | 15,           "%r15" },
187     { rc(arg) | rc(rg8) | rc(gpr) | 9,  "%r9" },
188     { rc(arg) | rc(rg8) | rc(gpr) | 8,  "%r8" },
189     { rc(arg) | rc(rg8) | rc(gpr) | 2,  "%rdx" },
190     { rc(arg) | rc(rg8) | rc(gpr) | 1,  "%rcx" },
191     { rc(sav) | 4,                      "%rsp" },
192     { rc(sav) | 5,                      "%rbp" },
193     { rc(xpr) | rc(fpr) | 4,            "%xmm4" },
194     { rc(xpr) | rc(fpr) | 5,            "%xmm5" },
195     { rc(sav) | rc(xpr) | rc(fpr) | 6,  "%xmm6" },
196     { rc(sav) | rc(xpr) | rc(fpr) | 7,  "%xmm7" },
197     { rc(sav) | rc(xpr) | rc(fpr) | 8,  "%xmm8" },
198     { rc(sav) | rc(xpr) | rc(fpr) | 9,  "%xmm9" },
199     { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" },
200     { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" },
201     { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" },
202     { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" },
203     { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" },
204     { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" },
205     { rc(xpr) | rc(arg) | rc(fpr) | 3,  "%xmm3" },
206     { rc(xpr) | rc(arg) | rc(fpr) | 2,  "%xmm2" },
207     { rc(xpr) | rc(arg) | rc(fpr) | 1,  "%xmm1" },
208     { rc(xpr) | rc(arg) | rc(fpr) | 0,  "%xmm0" },
209 #else
210     /* %rax is a pseudo flag argument for varargs functions */
211     { rc(arg) | rc(gpr) | rc(rg8) | 0,  "%rax" },
212     { rc(gpr) | rc(rg8) | 10,           "%r10" },
213     { rc(gpr) | rc(rg8) | 11,           "%r11" },
214     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%rbx" },
215     { rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" },
216     { rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" },
217     { rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" },
218     { rc(sav) | rc(gpr) | rc(rg8) | 12, "%r12" },
219     { rc(arg) | rc(rg8) | rc(gpr) | 9,  "%r9" },
220     { rc(arg) | rc(rg8) | rc(gpr) | 8,  "%r8" },
221     { rc(arg) | rc(rg8) | rc(gpr) | 1,  "%rcx" },
222     { rc(arg) | rc(rg8) | rc(gpr) | 2,  "%rdx" },
223     { rc(arg) | rc(rg8) | rc(gpr) | 6,  "%rsi" },
224     { rc(arg) | rc(rg8) | rc(gpr) | 7,  "%rdi" },
225     { rc(sav) | 4,                      "%rsp" },
226     { rc(sav) | 5,                      "%rbp" },
227     { rc(xpr) | rc(fpr) | 8,            "%xmm8" },
228     { rc(xpr) | rc(fpr) | 9,            "%xmm9" },
229     { rc(xpr) | rc(fpr) | 10,           "%xmm10" },
230     { rc(xpr) | rc(fpr) | 11,           "%xmm11" },
231     { rc(xpr) | rc(fpr) | 12,           "%xmm12" },
232     { rc(xpr) | rc(fpr) | 13,           "%xmm13" },
233     { rc(xpr) | rc(fpr) | 14,           "%xmm14" },
234     { rc(xpr) | rc(fpr) | 15,           "%xmm15" },
235     { rc(xpr) | rc(arg) | rc(fpr) | 7,  "%xmm7" },
236     { rc(xpr) | rc(arg) | rc(fpr) | 6,  "%xmm6" },
237     { rc(xpr) | rc(arg) | rc(fpr) | 5,  "%xmm5" },
238     { rc(xpr) | rc(arg) | rc(fpr) | 4,  "%xmm4" },
239     { rc(xpr) | rc(arg) | rc(fpr) | 3,  "%xmm3" },
240     { rc(xpr) | rc(arg) | rc(fpr) | 2,  "%xmm2" },
241     { rc(xpr) | rc(arg) | rc(fpr) | 1,  "%xmm1" },
242     { rc(xpr) | rc(arg) | rc(fpr) | 0,  "%xmm0" },
243 #  endif
244     { rc(fpr) | 0,                      "st(0)" },
245     { rc(fpr) | 1,                      "st(1)" },
246     { rc(fpr) | 2,                      "st(2)" },
247     { rc(fpr) | 3,                      "st(3)" },
248     { rc(fpr) | 4,                      "st(4)" },
249     { rc(fpr) | 5,                      "st(5)" },
250     { rc(fpr) | 6,                      "st(6)" },
251     { rc(fpr) | 7,                      "st(7)" },
252 #endif
253     { _NOREG,                           "<none>" },
254 };
255
256 static jit_int32_t iregs[] = {
257 #if __X32
258     _RBX, _RSI, _RDI,
259 #elif (__CYGWIN__ || _WIN32)
260     _RBX, _RDI, _RSI, _R12, _R13, _R14, _R15,
261 #else
262     _R15, _R14, _R13, _R12, _RBX,
263 #endif
264 };
265
266 #if __X64 && (__CYGWIN__ || _WIN32)
267 static jit_int32_t fregs[] = {
268     _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
269 };
270 #endif
271
272 /*
273  * Implementation
274  */
275 void
276 jit_get_cpu(void)
277 {
278     union {
279         /* eax=7 and ecx=0 */
280         struct {
281             jit_uword_t fsgsbase        : 1;
282             jit_uword_t IA32_TSC_ADJUST : 1;
283             jit_uword_t sgx             : 1;
284             jit_uword_t bmi1            : 1;
285             jit_uword_t hle             : 1;
286             jit_uword_t avx2            : 1;
287             jit_uword_t FDP_EXCPTN_ONLY : 1;
288             jit_uword_t smep            : 1;
289             jit_uword_t bmi2            : 1;
290             jit_uword_t erms            : 1;
291             jit_uword_t invpcid         : 1;
292             jit_uword_t rtm             : 1;
293             jit_uword_t rdt_m_pqm       : 1;
294             jit_uword_t dep_FPU_CS_DS   : 1;
295             jit_uword_t mpx             : 1;
296             jit_uword_t rdt_a_pqe       : 1;
297             jit_uword_t avx512_f        : 1;
298             jit_uword_t avx512_dq       : 1;
299             jit_uword_t rdseed          : 1;
300             jit_uword_t adx             : 1;
301             jit_uword_t smap            : 1;
302             jit_uword_t avx512_ifma     : 1;
303             jit_uword_t __reserved0     : 1;
304             jit_uword_t clflushopt      : 1;
305             jit_uword_t clwb            : 1;
306             jit_uword_t pt              : 1;
307             jit_uword_t avx512_pf       : 1;
308             jit_uword_t avx512_er       : 1;
309             jit_uword_t avx512_cd       : 1;
310             jit_uword_t sha             : 1;
311             jit_uword_t avx512_bw       : 1;
312             jit_uword_t avx512_vl       : 1;
313         } bits;
314         jit_uword_t     cpuid;
315     } ebx;
316     union {
317         /* eax=0 */
318         struct {
319             jit_uint32_t sse3           : 1;
320             jit_uint32_t pclmulqdq      : 1;
321             jit_uint32_t dtes64         : 1;    /* amd reserved */
322             jit_uint32_t monitor        : 1;
323             jit_uint32_t ds_cpl         : 1;    /* amd reserved */
324             jit_uint32_t vmx            : 1;    /* amd reserved */
325             jit_uint32_t smx            : 1;    /* amd reserved */
326             jit_uint32_t est            : 1;    /* amd reserved */
327             jit_uint32_t tm2            : 1;    /* amd reserved */
328             jit_uint32_t ssse3          : 1;
329             jit_uint32_t cntx_id        : 1;    /* amd reserved */
330             jit_uint32_t __reserved0    : 1;
331             jit_uint32_t fma            : 1;
332             jit_uint32_t cmpxchg16b     : 1;
333             jit_uint32_t xtpr           : 1;    /* amd reserved */
334             jit_uint32_t pdcm           : 1;    /* amd reserved */
335             jit_uint32_t __reserved1    : 1;
336             jit_uint32_t pcid           : 1;    /* amd reserved */
337             jit_uint32_t dca            : 1;    /* amd reserved */
338             jit_uint32_t sse4_1         : 1;
339             jit_uint32_t sse4_2         : 1;
340             jit_uint32_t x2apic         : 1;    /* amd reserved */
341             jit_uint32_t movbe          : 1;    /* amd reserved */
342             jit_uint32_t popcnt         : 1;
343             jit_uint32_t tsc            : 1;    /* amd reserved */
344             jit_uint32_t aes            : 1;
345             jit_uint32_t xsave          : 1;
346             jit_uint32_t osxsave        : 1;
347             jit_uint32_t avx            : 1;
348             jit_uint32_t __reserved2    : 1;    /* amd F16C */
349             jit_uint32_t __reserved3    : 1;
350             jit_uint32_t __alwayszero   : 1;    /* amd RAZ */
351         } bits;
352         jit_uword_t     cpuid;
353     } ecx;
354     union {
355         /* eax=0 */
356         struct {
357             jit_uint32_t fpu            : 1;
358             jit_uint32_t vme            : 1;
359             jit_uint32_t de             : 1;
360             jit_uint32_t pse            : 1;
361             jit_uint32_t tsc            : 1;
362             jit_uint32_t msr            : 1;
363             jit_uint32_t pae            : 1;
364             jit_uint32_t mce            : 1;
365             jit_uint32_t cmpxchg8b      : 1;
366             jit_uint32_t apic           : 1;
367             jit_uint32_t __reserved0    : 1;
368             jit_uint32_t sep            : 1;
369             jit_uint32_t mtrr           : 1;
370             jit_uint32_t pge            : 1;
371             jit_uint32_t mca            : 1;
372             jit_uint32_t cmov           : 1;
373             jit_uint32_t pat            : 1;
374             jit_uint32_t pse36          : 1;
375             jit_uint32_t psn            : 1;    /* amd reserved */
376             jit_uint32_t clfsh          : 1;
377             jit_uint32_t __reserved1    : 1;
378             jit_uint32_t ds             : 1;    /* amd reserved */
379             jit_uint32_t acpi           : 1;    /* amd reserved */
380             jit_uint32_t mmx            : 1;
381             jit_uint32_t fxsr           : 1;
382             jit_uint32_t sse            : 1;
383             jit_uint32_t sse2           : 1;
384             jit_uint32_t ss             : 1;    /* amd reserved */
385             jit_uint32_t htt            : 1;
386             jit_uint32_t tm             : 1;    /* amd reserved */
387             jit_uint32_t __reserved2    : 1;
388             jit_uint32_t pbe            : 1;    /* amd reserved */
389         } bits;
390         jit_uword_t     cpuid;
391     } edx;
392 #if __X32
393     int                 ac, flags;
394 #endif
395     jit_uword_t         eax;
396
397 #if __X32
398     /* adapted from glibc __sysconf */
399     __asm__ volatile ("pushfl;\n\t"
400                       "popl %0;\n\t"
401                       "movl $0x240000, %1;\n\t"
402                       "xorl %0, %1;\n\t"
403                       "pushl %1;\n\t"
404                       "popfl;\n\t"
405                       "pushfl;\n\t"
406                       "popl %1;\n\t"
407                       "xorl %0, %1;\n\t"
408                       "pushl %0;\n\t"
409                       "popfl"
410                       : "=r" (flags), "=r" (ac));
411
412     /* i386 or i486 without cpuid */
413     if ((ac & (1 << 21)) == 0)
414         /* probably without x87 as well */
415         return;
416 #endif
417
418     /* query %eax = 1 function */
419 #if __X32 || __X64_32
420     __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
421 #else
422     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
423 #endif
424                       : "=a" (eax), "=r" (ebx.cpuid),
425                       "=c" (ecx.cpuid), "=d" (edx.cpuid)
426                       : "0" (1));
427
428     jit_cpu.fpu         = edx.bits.fpu;
429     jit_cpu.cmpxchg8b   = edx.bits.cmpxchg8b;
430     jit_cpu.cmov        = edx.bits.cmov;
431     jit_cpu.mmx         = edx.bits.mmx;
432     jit_cpu.sse         = edx.bits.sse;
433     jit_cpu.sse2        = edx.bits.sse2;
434     jit_cpu.sse3        = ecx.bits.sse3;
435     jit_cpu.pclmulqdq   = ecx.bits.pclmulqdq;
436     jit_cpu.ssse3       = ecx.bits.ssse3;
437     jit_cpu.fma         = ecx.bits.fma;
438     jit_cpu.cmpxchg16b  = ecx.bits.cmpxchg16b;
439     jit_cpu.sse4_1      = ecx.bits.sse4_1;
440     jit_cpu.sse4_2      = ecx.bits.sse4_2;
441     jit_cpu.movbe       = ecx.bits.movbe;
442     jit_cpu.popcnt      = ecx.bits.popcnt;
443     jit_cpu.aes         = ecx.bits.aes;
444     jit_cpu.avx         = ecx.bits.avx;
445
446     /* query %eax = 7 and ecx = 0 function */
447 #if __X64
448     __asm__ volatile ("cpuid"
449                       : "=a" (eax), "=b" (ebx.cpuid), "=c" (ecx), "=d" (edx)
450                       : "a" (7), "c" (0));
451 #endif
452     jit_cpu.adx = ebx.bits.adx;
453
454
455     /* query %eax = 0x80000001 function */
456 #if __X64
457 #  if __X64_32
458     __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
459 #  else
460     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
461 #  endif
462                       : "=a" (eax), "=r" (ebx.cpuid),
463                       "=c" (ecx.cpuid), "=d" (edx.cpuid)
464                       : "0" (0x80000001));
465     jit_cpu.lahf        = !!(ecx.cpuid & 1);
466     jit_cpu.abm         = !!(ecx.cpuid & 32);
467 #endif
468 }
469
470 void
471 _jit_init(jit_state_t *_jit)
472 {
473 #if __X32
474     jit_int32_t         regno;
475     static jit_bool_t   first = 1;
476 #endif
477
478     _jitc->reglen = jit_size(_rvs) - 1;
479 #if __X32
480     if (first) {
481         if (!jit_cpu.sse2) {
482             for (regno = _jitc->reglen; regno >= 0; regno--) {
483                 if (_rvs[regno].spec & jit_class_xpr)
484                     _rvs[regno].spec = 0;
485             }
486         }
487         first = 0;
488     }
489 #endif
490 }
491
492 void
493 _jit_prolog(jit_state_t *_jit)
494 {
495     jit_int32_t         offset;
496
497     if (_jitc->function)
498         jit_epilog();
499     assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
500     jit_regset_set_ui(&_jitc->regsav, 0);
501     offset = _jitc->functions.offset;
502     if (offset >= _jitc->functions.length) {
503         jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
504                     _jitc->functions.length * sizeof(jit_function_t),
505                     (_jitc->functions.length + 16) * sizeof(jit_function_t));
506         _jitc->functions.length += 16;
507     }
508     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
509     /* One extra stack slot for implicit saved returned address */
510     _jitc->function->self.size = stack_framesize + REAL_WORDSIZE;
511     _jitc->function->self.argi = _jitc->function->self.argf =
512         _jitc->function->self.aoff = _jitc->function->self.alen = 0;
513     _jitc->function->cvt_offset = 0;
514 #if __X64 && (__CYGWIN__ || _WIN32)
515     /* force framepointer */
516     jit_check_frame();
517 #endif
518     _jitc->function->self.call = jit_call_default;
519     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
520               _jitc->reglen * sizeof(jit_int32_t));
521
522     /* _no_link here does not mean the jit_link() call can be removed
523      * by rewriting as:
524      * _jitc->function->prolog = jit_new_node(jit_code_prolog);
525      */
526     _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
527     jit_link(_jitc->function->prolog);
528     _jitc->function->prolog->w.w = offset;
529     _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
530     /*  u:      label value
531      *  v:      offset in blocks vector
532      *  w:      offset in functions vector
533      */
534     _jitc->function->epilog->w.w = offset;
535
536     jit_regset_new(&_jitc->function->regset);
537 }
538
539 jit_int32_t
540 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
541 {
542     assert(_jitc->function);
543     jit_check_frame();
544 #if __X32
545     /* Stack is 4 bytes aligned but jit functions keep it 8 bytes aligned.
546      * Called functions have 16 byte aligned stack. */
547     if (!_jitc->function->self.aoff)
548         _jitc->function->self.aoff = -4;
549 #endif
550     switch (length) {
551         case 0: case 1:                                         break;
552         case 2:         _jitc->function->self.aoff &= -2;       break;
553         case 3: case 4: _jitc->function->self.aoff &= -4;       break;
554         default:        _jitc->function->self.aoff &= -8;       break;
555     }
556     _jitc->function->self.aoff -= length;
557
558     /* jit_allocai() may be called from jit_x86-cpu.c, and force a function
559      * generation restart on some conditions: div/rem and qmul/qdiv, due
560      * to registers constraints.
561      * The check is to prevent an assertion of a jit_xyz() being called
562      * during code generation, and attempting to add a node to the tail
563      * of the current IR generation. */
564     if (!_jitc->realize) {
565         jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
566         jit_dec_synth();
567     }
568
569     return (_jitc->function->self.aoff);
570 }
571
572 void
573 _jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
574 {
575     jit_int32_t          reg;
576     assert(_jitc->function);
577     jit_inc_synth_ww(allocar, u, v);
578     if (!_jitc->function->allocar) {
579         _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
580         _jitc->function->allocar = 1;
581     }
582     reg = jit_get_reg(jit_class_gpr);
583     jit_negr(reg, v);
584     jit_andi(reg, reg, -16);
585     jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
586     jit_addr(u, u, reg);
587     jit_addr(JIT_SP, JIT_SP, reg);
588     jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
589     jit_unget_reg(reg);
590     jit_dec_synth();
591 }
592
593 void
594 _jit_ret(jit_state_t *_jit)
595 {
596     jit_node_t          *instr;
597     assert(_jitc->function);
598     jit_inc_synth(ret);
599     /* jump to epilog */
600     instr = jit_jmpi();
601     jit_patch_at(instr, _jitc->function->epilog);
602     jit_dec_synth();
603 }
604
605 void
606 _jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
607 {
608     jit_code_inc_synth_w(code, u);
609     jit_movr(JIT_RET, u);
610     jit_ret();
611     jit_dec_synth();
612 }
613
614 void
615 _jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
616 {
617     jit_code_inc_synth_w(code, u);
618     jit_movi(JIT_RET, u);
619     jit_ret();
620     jit_dec_synth();
621 }
622
623 void
624 _jit_retr_f(jit_state_t *_jit, jit_int32_t u)
625 {
626     jit_inc_synth_w(retr_f, u);
627     if (JIT_FRET != u)
628         jit_movr_f(JIT_FRET, u);
629     else
630         jit_live(JIT_FRET);
631     jit_ret();
632     jit_dec_synth();
633 }
634
635 void
636 _jit_reti_f(jit_state_t *_jit, jit_float32_t u)
637 {
638     jit_inc_synth_f(reti_f, u);
639     jit_movi_f(JIT_FRET, u);
640     jit_ret();
641     jit_dec_synth();
642 }
643
644 void
645 _jit_retr_d(jit_state_t *_jit, jit_int32_t u)
646 {
647     jit_inc_synth_w(retr_d, u);
648     if (JIT_FRET != u)
649         jit_movr_d(JIT_FRET, u);
650     else
651         jit_live(JIT_FRET);
652     jit_ret();
653     jit_dec_synth();
654 }
655
656 void
657 _jit_reti_d(jit_state_t *_jit, jit_float64_t u)
658 {
659     jit_inc_synth_d(reti_d, u);
660     jit_movi_d(JIT_FRET, u);
661     jit_ret();
662     jit_dec_synth();
663 }
664
665 void
666 _jit_epilog(jit_state_t *_jit)
667 {
668     assert(_jitc->function);
669     assert(_jitc->function->epilog->next == NULL);
670     jit_link(_jitc->function->epilog);
671     _jitc->function = NULL;
672 }
673
674 jit_bool_t
675 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
676 {
677     if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
678         return (jit_arg_reg_p(u->u.w));
679     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
680     return (jit_arg_f_reg_p(u->u.w));
681 }
682
683 void
684 _jit_ellipsis(jit_state_t *_jit)
685 {
686     jit_inc_synth(ellipsis);
687     jit_check_frame();
688     if (_jitc->prepare) {
689         jit_link_prepare();
690         /* Remember that a varargs function call is being constructed. */
691         assert(!(_jitc->function->call.call & jit_call_varargs));
692         _jitc->function->call.call |= jit_call_varargs;
693     }
694     else {
695         jit_link_prolog();
696         /* Remember the current function is varargs. */
697         assert(!(_jitc->function->self.call & jit_call_varargs));
698         _jitc->function->self.call |= jit_call_varargs;
699
700 #if __X64 && !(__CYGWIN__ || _WIN32)
701         /* Allocate va_list like object in the stack.
702          * If applicable, with enough space to save all argument
703          * registers, and use fixed offsets for them. */
704         _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
705
706         /* Initialize gp offset in save area. */
707         if (jit_arg_reg_p(_jitc->function->self.argi))
708             _jitc->function->vagp = _jitc->function->self.argi * 8;
709         else
710             _jitc->function->vagp = va_gp_max_offset;
711
712         /* Initialize fp offset in save area. */
713         if (jit_arg_f_reg_p(_jitc->function->self.argf))
714             _jitc->function->vafp = _jitc->function->self.argf * 16 +
715                                     va_gp_max_offset;
716         else
717             _jitc->function->vafp = va_fp_max_offset;
718 #endif
719     }
720     jit_dec_synth();
721 }
722
723 void
724 _jit_va_push(jit_state_t *_jit, jit_int32_t u)
725 {
726     jit_inc_synth_w(va_push, u);
727     jit_pushargr(u);
728     jit_dec_synth();
729 }
730
731 jit_node_t *
732 _jit_arg(jit_state_t *_jit, jit_code_t code)
733 {
734     jit_node_t          *node;
735     jit_int32_t          offset;
736     assert(_jitc->function);
737     assert(!(_jitc->function->self.call & jit_call_varargs));
738 #if STRONG_TYPE_CHECKING
739     assert(code >= jit_code_arg_c && code <= jit_code_arg);
740 #endif
741 #if __X64
742     if (jit_arg_reg_p(_jitc->function->self.argi)) {
743         offset = _jitc->function->self.argi++;
744 #  if __CYGWIN__ || _WIN32
745         _jitc->function->self.size += sizeof(jit_word_t);
746 #  endif
747     }
748     else
749 #endif
750     {
751         offset = _jitc->function->self.size;
752         _jitc->function->self.size += REAL_WORDSIZE;
753         jit_check_frame();
754     }
755     node = jit_new_node_ww(code, offset,
756                            ++_jitc->function->self.argn);
757     jit_link_prolog();
758     return (node);
759 }
760
761 jit_node_t *
762 _jit_arg_f(jit_state_t *_jit)
763 {
764     jit_node_t          *node;
765     jit_int32_t          offset;
766     assert(_jitc->function);
767     assert(!(_jitc->function->self.call & jit_call_varargs));
768 #if __X64
769 #  if __CYGWIN__ || _WIN32
770     if (jit_arg_reg_p(_jitc->function->self.argi)) {
771         offset = _jitc->function->self.argi++;
772         _jitc->function->self.size += sizeof(jit_word_t);
773     }
774 #  else
775     if (jit_arg_f_reg_p(_jitc->function->self.argf))
776         offset = _jitc->function->self.argf++;
777 #  endif
778     else
779 #endif
780     {
781         offset = _jitc->function->self.size;
782         _jitc->function->self.size += REAL_WORDSIZE;
783         jit_check_frame();
784     }
785     node = jit_new_node_ww(jit_code_arg_f, offset,
786                            ++_jitc->function->self.argn);
787     jit_link_prolog();
788     return (node);
789 }
790
791 jit_node_t *
792 _jit_arg_d(jit_state_t *_jit)
793 {
794     jit_node_t          *node;
795     jit_int32_t          offset;
796     assert(_jitc->function);
797     assert(!(_jitc->function->self.call & jit_call_varargs));
798 #if __X64
799 #  if __CYGWIN__ || _WIN32
800     if (jit_arg_reg_p(_jitc->function->self.argi)) {
801         offset = _jitc->function->self.argi++;
802         _jitc->function->self.size += sizeof(jit_word_t);
803     }
804 #  else
805     if (jit_arg_f_reg_p(_jitc->function->self.argf))
806         offset = _jitc->function->self.argf++;
807 #  endif
808     else
809 #endif
810     {
811         offset = _jitc->function->self.size;
812         _jitc->function->self.size += sizeof(jit_float64_t);
813         jit_check_frame();
814     }
815     node = jit_new_node_ww(jit_code_arg_d, offset,
816                            ++_jitc->function->self.argn);
817     jit_link_prolog();
818     return (node);
819 }
820
821 void
822 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
823 {
824     assert_arg_type(v->code, jit_code_arg_c);
825     jit_inc_synth_wp(getarg_c, u, v);
826 #if __X64
827     if (jit_arg_reg_p(v->u.w))
828         jit_extr_c(u, JIT_RA0 - v->u.w);
829     else
830 #endif
831     {
832         jit_node_t      *node = jit_ldxi_c(u, _RBP, v->u.w);
833         jit_link_alist(node);
834     }
835     jit_dec_synth();
836 }
837
838 void
839 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
840 {
841     assert_arg_type(v->code, jit_code_arg_c);
842     jit_inc_synth_wp(getarg_uc, u, v);
843 #if __X64
844     if (jit_arg_reg_p(v->u.w))
845         jit_extr_uc(u, JIT_RA0 - v->u.w);
846     else
847 #endif
848     {
849         jit_node_t      *node = jit_ldxi_uc(u, _RBP, v->u.w);
850         jit_link_alist(node);
851     }
852     jit_dec_synth();
853 }
854
855 void
856 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
857 {
858     assert_arg_type(v->code, jit_code_arg_s);
859     jit_inc_synth_wp(getarg_s, u, v);
860 #if __X64
861     if (jit_arg_reg_p(v->u.w))
862         jit_extr_s(u, JIT_RA0 - v->u.w);
863     else
864 #endif
865     {
866         jit_node_t      *node = jit_ldxi_s(u, _RBP, v->u.w);
867         jit_link_alist(node);
868     }
869     jit_dec_synth();
870 }
871
872 void
873 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
874 {
875     assert_arg_type(v->code, jit_code_arg_s);
876     jit_inc_synth_wp(getarg_us, u, v);
877 #if __X64
878     if (jit_arg_reg_p(v->u.w))
879         jit_extr_us(u, JIT_RA0 - v->u.w);
880     else
881 #endif
882     {
883         jit_node_t      *node = jit_ldxi_us(u, _RBP, v->u.w);
884         jit_link_alist(node);
885     }
886     jit_dec_synth();
887 }
888
889 void
890 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
891 {
892     assert_arg_type(v->code, jit_code_arg_i);
893     jit_inc_synth_wp(getarg_i, u, v);
894 #if __X64
895     if (jit_arg_reg_p(v->u.w)) {
896 #  if __X64_32
897         jit_movr(u, JIT_RA0 - v->u.w);
898 #  else
899         jit_extr_i(u, JIT_RA0 - v->u.w);
900 #  endif
901      }
902     else
903 #endif
904     {
905         jit_node_t      *node = jit_ldxi_i(u, _RBP, v->u.w);
906         jit_link_alist(node);
907     }
908     jit_dec_synth();
909 }
910
911 #if __X64 && !__X64_32
912 void
913 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
914 {
915     assert_arg_type(v->code, jit_code_arg_i);
916     jit_inc_synth_wp(getarg_ui, u, v);
917     if (jit_arg_reg_p(v->u.w))
918         jit_extr_ui(u, JIT_RA0 - v->u.w);
919     else {
920         jit_node_t      *node = jit_ldxi_ui(u, _RBP, v->u.w);
921         jit_link_alist(node);
922     }
923     jit_dec_synth();
924 }
925
926 void
927 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
928 {
929     assert_arg_type(v->code, jit_code_arg_l);
930     jit_inc_synth_wp(getarg_l, u, v);
931     if (jit_arg_reg_p(v->u.w))
932         jit_movr(u, JIT_RA0 - v->u.w);
933     else {
934         jit_node_t      *node = jit_ldxi_l(u, _RBP, v->u.w);
935         jit_link_alist(node);
936     }
937     jit_dec_synth();
938 }
939 #endif
940
941 void
942 _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
943 {
944     assert_putarg_type(code, v->code);
945     jit_code_inc_synth_wp(code, u, v);
946 #if __X64
947     if (jit_arg_reg_p(v->u.w))
948         jit_movr(JIT_RA0 - v->u.w, u);
949     else
950 #endif
951     {
952         jit_node_t      *node = jit_stxi(v->u.w, _RBP, u);
953         jit_link_alist(node);
954     }
955     jit_dec_synth();
956 }
957
958 void
959 _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
960 {
961     jit_int32_t         regno;
962     assert_putarg_type(code, v->code);
963     jit_code_inc_synth_wp(code, u, v);
964 #if __X64
965     if (jit_arg_reg_p(v->u.w))
966         jit_movi(JIT_RA0 - v->u.w, u);
967     else
968 #endif
969     {
970         jit_node_t      *node;
971         regno = jit_get_reg(jit_class_gpr);
972         jit_movi(regno, u);
973         node = jit_stxi(v->u.w, _RBP, regno);
974         jit_link_alist(node);
975         jit_unget_reg(regno);
976     }
977     jit_dec_synth();
978 }
979
980 void
981 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
982 {
983     assert(v->code == jit_code_arg_f);
984     jit_inc_synth_wp(getarg_f, u, v);
985 #if __X64
986     if (jit_arg_f_reg_p(v->u.w))
987         jit_movr_f(u, _XMM0 - v->u.w);
988     else
989 #endif
990     {
991         jit_node_t      *node = jit_ldxi_f(u, _RBP, v->u.w);
992         jit_link_alist(node);
993     }
994     jit_dec_synth();
995 }
996
997 void
998 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
999 {
1000     assert(v->code == jit_code_arg_f);
1001     jit_inc_synth_wp(putargr_f, u, v);
1002 #if __X64
1003     if (jit_arg_f_reg_p(v->u.w))
1004         jit_movr_f(_XMM0 - v->u.w, u);
1005     else
1006 #endif
1007     {
1008         jit_node_t      *node = jit_stxi_f(v->u.w, _RBP, u);
1009         jit_link_alist(node);
1010     }
1011     jit_dec_synth();
1012 }
1013
1014 void
1015 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
1016 {
1017     jit_int32_t         regno;
1018     assert(v->code == jit_code_arg_f);
1019     jit_inc_synth_fp(putargi_f, u, v);
1020 #if __X64
1021     if (jit_arg_f_reg_p(v->u.w))
1022         jit_movi_f(_XMM0 - v->u.w, u);
1023     else
1024 #endif
1025     {
1026         jit_node_t      *node;
1027         regno = jit_get_reg(jit_class_fpr);
1028         jit_movi_f(regno, u);
1029         node = jit_stxi_f(v->u.w, _RBP, regno);
1030         jit_link_alist(node);
1031         jit_unget_reg(regno);
1032     }
1033     jit_dec_synth();
1034 }
1035
1036 void
1037 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
1038 {
1039     assert(v->code == jit_code_arg_d);
1040     jit_inc_synth_wp(getarg_d, u, v);
1041 #if __X64
1042     if (jit_arg_f_reg_p(v->u.w))
1043         jit_movr_d(u, _XMM0 - v->u.w);
1044     else
1045 #endif
1046     {
1047         jit_node_t      *node = jit_ldxi_d(u, _RBP, v->u.w);
1048         jit_link_alist(node);
1049     }
1050     jit_dec_synth();
1051 }
1052
1053 void
1054 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
1055 {
1056     assert(v->code == jit_code_arg_d);
1057     jit_inc_synth_wp(putargr_d, u, v);
1058 #if __X64
1059     if (jit_arg_f_reg_p(v->u.w))
1060         jit_movr_d(_XMM0 - v->u.w, u);
1061     else
1062 #endif
1063     {
1064         jit_node_t      *node = jit_stxi_d(v->u.w, _RBP, u);
1065         jit_link_alist(node);
1066     }
1067     jit_dec_synth();
1068 }
1069
1070 void
1071 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
1072 {
1073     jit_int32_t         regno;
1074     assert(v->code == jit_code_arg_d);
1075     jit_inc_synth_dp(putargi_d, u, v);
1076 #if __X64
1077     if (jit_arg_f_reg_p(v->u.w))
1078         jit_movi_d(_XMM0 - v->u.w, u);
1079     else
1080 #endif
1081     {
1082         jit_node_t      *node;
1083         regno = jit_get_reg(jit_class_fpr);
1084         jit_movi_d(regno, u);
1085         node = jit_stxi_d(v->u.w, _RBP, regno);
1086         jit_link_alist(node);
1087         jit_unget_reg(regno);
1088     }
1089     jit_dec_synth();
1090 }
1091
1092 void
1093 _jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
1094 {
1095     assert(_jitc->function);
1096     jit_code_inc_synth_w(code, u);
1097     jit_link_prepare();
1098 #if __X64
1099     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1100         jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
1101         ++_jitc->function->call.argi;
1102 #  if __CYGWIN__ || _WIN32
1103         if (_jitc->function->call.call & jit_call_varargs)
1104             jit_stxi(_jitc->function->call.size, _RSP, u);
1105         _jitc->function->call.size += sizeof(jit_word_t);
1106 #  endif
1107     }
1108     else
1109 #endif
1110     {
1111         jit_stxi(_jitc->function->call.size, _RSP, u);
1112         _jitc->function->call.size += REAL_WORDSIZE;
1113         jit_check_frame();
1114     }
1115     jit_dec_synth();
1116 }
1117
1118 void
1119 _jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
1120 {
1121     jit_int32_t          regno;
1122     assert(_jitc->function);
1123     jit_code_inc_synth_w(code, u);
1124     jit_link_prepare();
1125 #if __X64
1126     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1127         jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
1128 #  if __CYGWIN__ || _WIN32
1129         if (_jitc->function->call.call & jit_call_varargs)
1130             jit_stxi(_jitc->function->call.size, _RSP,
1131                      JIT_RA0 - _jitc->function->call.argi);
1132         _jitc->function->call.size += sizeof(jit_word_t);
1133 #  endif
1134         ++_jitc->function->call.argi;
1135     }
1136     else
1137 #endif
1138     {
1139         regno = jit_get_reg(jit_class_gpr);
1140         jit_movi(regno, u);
1141         jit_stxi(_jitc->function->call.size, _RSP, regno);
1142         _jitc->function->call.size += REAL_WORDSIZE;
1143         jit_unget_reg(regno);
1144         jit_check_frame();
1145     }
1146     jit_dec_synth();
1147 }
1148
1149 void
1150 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
1151 {
1152     assert(_jitc->function);
1153     jit_inc_synth_w(pushargr_f, u);
1154     jit_link_prepare();
1155 #if __X64
1156 #  if __CYGWIN__ || _WIN32
1157     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1158         jit_movr_f(_XMM0 - _jitc->function->call.argi, u);
1159         if (_jitc->function->call.call & jit_call_varargs) {
1160             jit_stxi_f(_jitc->function->call.size, _RSP,
1161                        _XMM0 - _jitc->function->call.argi);
1162             jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1163                        _jitc->function->call.size);
1164         }
1165         ++_jitc->function->call.argi;
1166         _jitc->function->call.size += sizeof(jit_word_t);
1167     }
1168 #  else
1169     if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
1170         jit_movr_f(_XMM0 - _jitc->function->call.argf, u);
1171         ++_jitc->function->call.argf;
1172     }
1173 #  endif
1174     else
1175 #endif
1176     {
1177         jit_stxi_f(_jitc->function->call.size, _RSP, u);
1178         _jitc->function->call.size += REAL_WORDSIZE;
1179         jit_check_frame();
1180     }
1181     jit_dec_synth();
1182 }
1183
1184 void
1185 _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
1186 {
1187     jit_int32_t         regno;
1188     assert(_jitc->function);
1189     jit_inc_synth_f(pushargi_f, u);
1190     jit_link_prepare();
1191 #if __X64
1192 #  if __CYGWIN__ || _WIN32
1193     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1194         jit_movi_f(_XMM0 - _jitc->function->call.argi, u);
1195         if (_jitc->function->call.call & jit_call_varargs) {
1196             jit_stxi_f(_jitc->function->call.size, _RSP,
1197                        _XMM0 - _jitc->function->call.argi);
1198             jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1199                        _jitc->function->call.size);
1200         }
1201         ++_jitc->function->call.argi;
1202         _jitc->function->call.size += sizeof(jit_word_t);
1203     }
1204 #  else
1205     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1206         jit_movi_f(_XMM0 - _jitc->function->call.argf, u);
1207         ++_jitc->function->call.argf;
1208     }
1209 #  endif
1210     else
1211 #endif
1212     {
1213         regno = jit_get_reg(jit_class_fpr);
1214         jit_movi_f(regno, u);
1215         jit_stxi_f(_jitc->function->call.size, _RSP, regno);
1216         _jitc->function->call.size += REAL_WORDSIZE;
1217         jit_unget_reg(regno);
1218         jit_check_frame();
1219     }
1220     jit_dec_synth();
1221 }
1222
1223 void
1224 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
1225 {
1226     assert(_jitc->function);
1227     jit_inc_synth_w(pushargr_d, u);
1228     jit_link_prepare();
1229 #if __X64
1230 #  if __CYGWIN__ || _WIN32
1231     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1232         jit_movr_d(_XMM0 - _jitc->function->call.argi, u);
1233         if (_jitc->function->call.call & jit_call_varargs) {
1234             jit_stxi_d(_jitc->function->call.size, _RSP,
1235                        _XMM0 - _jitc->function->call.argi);
1236             jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1237                        _jitc->function->call.size);
1238         }
1239         ++_jitc->function->call.argi;
1240         _jitc->function->call.size += sizeof(jit_word_t);
1241     }
1242 #  else
1243     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1244         jit_movr_d(_XMM0 - _jitc->function->call.argf, u);
1245         ++_jitc->function->call.argf;
1246     }
1247 #  endif
1248     else
1249 #endif
1250     {
1251         jit_stxi_d(_jitc->function->call.size, _RSP, u);
1252         _jitc->function->call.size += sizeof(jit_float64_t);
1253         jit_check_frame();
1254     }
1255     jit_dec_synth();
1256 }
1257
1258 void
1259 _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
1260 {
1261     jit_int32_t          regno;
1262     assert(_jitc->function);
1263     jit_inc_synth_d(pushargi_d, u);
1264     jit_link_prepare();
1265 #if __X64
1266 #  if __CYGWIN__ || _WIN32
1267     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1268         jit_movi_d(_XMM0 - _jitc->function->call.argi, u);
1269         if (_jitc->function->call.call & jit_call_varargs) {
1270             jit_stxi_d(_jitc->function->call.size, _RSP,
1271                        _XMM0 - _jitc->function->call.argi);
1272             jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1273                        _jitc->function->call.size);
1274         }
1275         ++_jitc->function->call.argi;
1276         _jitc->function->call.size += sizeof(jit_word_t);
1277     }
1278 #  else
1279     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1280         jit_movi_d(_XMM0 - _jitc->function->call.argf, u);
1281         ++_jitc->function->call.argf;
1282     }
1283 #  endif
1284     else
1285 #endif
1286     {
1287         regno = jit_get_reg(jit_class_fpr);
1288         jit_movi_d(regno, u);
1289         jit_stxi_d(_jitc->function->call.size, _RSP, regno);
1290         _jitc->function->call.size += sizeof(jit_float64_t);
1291         jit_unget_reg(regno);
1292         jit_check_frame();
1293     }
1294     jit_dec_synth();
1295 }
1296
1297 jit_bool_t
1298 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
1299 {
1300 #if __X64
1301     jit_int32_t         spec;
1302
1303     spec = jit_class(_rvs[regno].spec);
1304     if (spec & jit_class_arg) {
1305         if (spec & jit_class_gpr) {
1306             regno = JIT_RA0 - regno;
1307             if (regno >= 0 && regno < node->v.w)
1308                 return (1);
1309         }
1310         else if (spec & jit_class_fpr) {
1311             regno = _XMM0 - regno;
1312             if (regno >= 0 && regno < node->w.w)
1313                 return (1);
1314         }
1315     }
1316 #endif
1317     return (0);
1318 }
1319
1320 void
1321 _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
1322 {
1323     jit_int32_t          reg;
1324     jit_node_t          *call;
1325     assert(_jitc->function);
1326     jit_check_frame();
1327     reg = r0;
1328     jit_inc_synth_w(finishr, r0);
1329     if (_jitc->function->self.alen < _jitc->function->call.size)
1330         _jitc->function->self.alen = _jitc->function->call.size;
1331 #if __X64
1332 #  if !(__CYGWIN__ || _WIN32)
1333     if (_jitc->function->call.call & jit_call_varargs) {
1334         if (jit_regno(reg) == _RAX) {
1335             reg = jit_get_reg(jit_class_gpr);
1336             jit_movr(reg, _RAX);
1337         }
1338         if (_jitc->function->call.argf)
1339             jit_movi(_RAX, _jitc->function->call.argf);
1340         else
1341             jit_movi(_RAX, 0);
1342         if (reg != r0)
1343             jit_unget_reg(reg);
1344     }
1345 #  endif
1346 #endif
1347     call = jit_callr(reg);
1348     call->v.w = _jitc->function->call.argi;
1349     call->w.w = _jitc->function->call.argf;
1350     _jitc->function->call.argi = _jitc->function->call.argf =
1351         _jitc->function->call.size = 0;
1352     _jitc->prepare = 0;
1353     jit_dec_synth();
1354 }
1355
1356 jit_node_t *
1357 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
1358 {
1359     jit_node_t          *node;
1360     assert(_jitc->function);
1361     jit_check_frame();
1362     jit_inc_synth_w(finishi, (jit_word_t)i0);
1363     if (_jitc->function->self.alen < _jitc->function->call.size)
1364         _jitc->function->self.alen = _jitc->function->call.size;
1365 #if __X64
1366 #  if !(__CYGWIN__ || _WIN32)
1367     if (_jitc->function->call.call & jit_call_varargs) {
1368         if (_jitc->function->call.argf)
1369             jit_movi(_RAX, _jitc->function->call.argf);
1370         else
1371             jit_movi(_RAX, 0);
1372         jit_live(_RAX);
1373     }
1374 #  endif
1375 #endif
1376     node = jit_calli(i0);
1377     node->v.w = _jitc->function->call.argi;
1378     node->w.w = _jitc->function->call.argf;
1379     _jitc->function->call.argi = _jitc->function->call.argf =
1380         _jitc->function->call.size = 0;
1381     _jitc->prepare = 0;
1382     jit_dec_synth();
1383     return (node);
1384 }
1385
1386 void
1387 _jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
1388 {
1389     jit_inc_synth_w(retval_c, r0);
1390     jit_extr_c(r0, JIT_RET);
1391     jit_dec_synth();
1392 }
1393
1394 void
1395 _jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
1396 {
1397     jit_inc_synth_w(retval_uc, r0);
1398     jit_extr_uc(r0, JIT_RET);
1399     jit_dec_synth();
1400 }
1401
1402 void
1403 _jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
1404 {
1405     jit_inc_synth_w(retval_s, r0);
1406     jit_extr_s(r0, JIT_RET);
1407     jit_dec_synth();
1408 }
1409
1410 void
1411 _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
1412 {
1413     jit_inc_synth_w(retval_us, r0);
1414     jit_extr_us(r0, JIT_RET);
1415     jit_dec_synth();
1416 }
1417
1418 void
1419 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
1420 {
1421     jit_inc_synth_w(retval_i, r0);
1422 #if __X32 || __X64_32
1423     if (r0 != JIT_RET)
1424         jit_movr(r0, JIT_RET);
1425 #else
1426     jit_extr_i(r0, JIT_RET);
1427 #endif
1428     jit_dec_synth();
1429 }
1430
1431 #if __X64 && !__X64_32
1432 void
1433 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
1434 {
1435     jit_inc_synth_w(retval_ui, r0);
1436     jit_extr_ui(r0, JIT_RET);
1437     jit_dec_synth();
1438 }
1439
1440 void
1441 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
1442 {
1443     jit_inc_synth_w(retval_l, r0);
1444     if (r0 != JIT_RET)
1445         jit_movr(r0, JIT_RET);
1446     jit_dec_synth();
1447 }
1448 #endif
1449
1450 void
1451 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
1452 {
1453     jit_inc_synth_w(retval_f, r0);
1454 #if __X64
1455     if (r0 != JIT_FRET)
1456         jit_movr_f(r0, JIT_FRET);
1457 #endif
1458     jit_dec_synth();
1459 }
1460
1461 void
1462 _jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
1463 {
1464     jit_inc_synth_w(retval_d, r0);
1465 #if __X64
1466     if (r0 != JIT_FRET)
1467         jit_movr_d(r0, JIT_FRET);
1468 #endif
1469     jit_dec_synth();
1470 }
1471
1472 jit_pointer_t
1473 _emit_code(jit_state_t *_jit)
1474 {
1475     jit_node_t          *node;
1476     jit_node_t          *temp;
1477     jit_word_t           word;
1478     jit_int32_t          value;
1479     jit_int32_t          offset;
1480     struct {
1481         jit_node_t      *node;
1482         jit_word_t       word;
1483         jit_function_t   func;
1484 #if DEVEL_DISASSEMBLER
1485         jit_word_t       prevw;
1486 #endif
1487         jit_int32_t      patch_offset;
1488     } undo;
1489 #if DEVEL_DISASSEMBLER
1490     jit_word_t           prevw;
1491 #endif
1492
1493     _jitc->function = NULL;
1494
1495     jit_reglive_setup();
1496
1497     undo.word = 0;
1498     undo.node = NULL;
1499     undo.patch_offset = 0;
1500 #define case_rr(name, type)                                             \
1501             case jit_code_##name##r##type:                              \
1502                 name##r##type(rn(node->u.w), rn(node->v.w));            \
1503                 break
1504 #define case_rw(name, type)                                             \
1505             case jit_code_##name##i##type:                              \
1506                 name##i##type(rn(node->u.w), node->v.w);                \
1507                 break
1508 #define case_rf(name, type)                                             \
1509             case jit_code_##name##r##type:                              \
1510                 if (jit_x87_reg_p(node->v.w))                           \
1511                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1512                 else                                                    \
1513                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1514                 break
1515 #define case_fr(name, type)                                             \
1516             case jit_code_##name##r##type:                              \
1517                 if (jit_x87_reg_p(node->u.w))                           \
1518                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1519                 else                                                    \
1520                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1521                 break
1522 #define case_fw(name, type)                                             \
1523             case jit_code_##name##i##type:                              \
1524                 if (jit_x87_reg_p(node->u.w))                           \
1525                     x87_##name##i##type(rn(node->u.w), node->v.w);      \
1526                 else                                                    \
1527                     sse_##name##i##type(rn(node->u.w), node->v.w);      \
1528                 break
1529 #define case_wr(name, type)                                             \
1530             case jit_code_##name##i##type:                              \
1531                 name##i##type(node->u.w, rn(node->v.w));                \
1532                 break
1533 #define case_wf(name, type)                                             \
1534             case jit_code_##name##i##type:                              \
1535                 if (jit_x87_reg_p(node->v.w))                           \
1536                     x87_##name##i##type(node->u.w, rn(node->v.w));      \
1537                 else                                                    \
1538                     sse_##name##i##type(node->u.w, rn(node->v.w));      \
1539                 break
1540 #define case_ff(name, type)                                             \
1541             case jit_code_##name##r##type:                              \
1542                 if (jit_x87_reg_p(node->u.w) &&                         \
1543                     jit_x87_reg_p(node->v.w))                           \
1544                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1545                 else                                                    \
1546                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1547                 break;
1548 #define case_rrr(name, type)                                            \
1549             case jit_code_##name##r##type:                              \
1550                 name##r##type(rn(node->u.w),                            \
1551                               rn(node->v.w), rn(node->w.w));            \
1552                 break
1553 #define case_rrrr(name, type)                                           \
1554             case jit_code_##name##r##type:                              \
1555                 name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
1556                               rn(node->v.w), rn(node->w.w));            \
1557                 break
1558 #define case_frr(name, type)                                            \
1559             case jit_code_##name##r##type:                              \
1560                 if (jit_x87_reg_p(node->u.w))                           \
1561                     x87_##name##r##type(rn(node->u.w),                  \
1562                                         rn(node->v.w), rn(node->w.w));  \
1563                 else                                                    \
1564                     sse_##name##r##type(rn(node->u.w),                  \
1565                                         rn(node->v.w), rn(node->w.w));  \
1566                 break
1567 #define case_rrf(name, type)                                            \
1568             case jit_code_##name##r##type:                              \
1569                 if (jit_x87_reg_p(node->w.w))                           \
1570                     x87_##name##r##type(rn(node->u.w),                  \
1571                                         rn(node->v.w), rn(node->w.w));  \
1572                 else                                                    \
1573                     sse_##name##r##type(rn(node->u.w),                  \
1574                                         rn(node->v.w), rn(node->w.w));  \
1575                 break
1576 #define case_rrw(name, type)                                            \
1577             case jit_code_##name##i##type:                              \
1578                 name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
1579                 break
1580 #define case_rrrw(name, type)                                           \
1581             case jit_code_##name##i##type:                              \
1582                 name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
1583                               rn(node->v.w), node->w.w);                \
1584                 break
1585 #define case_frw(name, type)                                            \
1586             case jit_code_##name##i##type:                              \
1587                 if (jit_x87_reg_p(node->u.w))                           \
1588                     x87_##name##i##type(rn(node->u.w),                  \
1589                                         rn(node->v.w), node->w.w);      \
1590                 else                                                    \
1591                     sse_##name##i##type(rn(node->u.w),                  \
1592                                         rn(node->v.w), node->w.w);      \
1593                 break
1594 #define case_wrr(name, type)                                            \
1595             case jit_code_##name##i##type:                              \
1596                 name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
1597                 break
1598 #define case_wrf(name, type)                                            \
1599             case jit_code_##name##i##type:                              \
1600                 if (jit_x87_reg_p(node->w.w))                           \
1601                     x87_##name##i##type(node->u.w,                      \
1602                                         rn(node->v.w), rn(node->w.w));  \
1603                 else                                                    \
1604                     sse_##name##i##type(node->u.w,                      \
1605                                         rn(node->v.w), rn(node->w.w));  \
1606                 break
1607 #define case_brr(name, type)                                            \
1608             case jit_code_##name##r##type:                              \
1609                 temp = node->u.n;                                       \
1610                 assert(temp->code == jit_code_label ||                  \
1611                        temp->code == jit_code_epilog);                  \
1612                 if (temp->flag & jit_flag_patch)                        \
1613                     name##r##type(temp->u.w, rn(node->v.w),             \
1614                                   rn(node->w.w));                       \
1615                 else {                                                  \
1616                     word = name##r##type(_jit->pc.w,                    \
1617                                          rn(node->v.w), rn(node->w.w)); \
1618                     patch(word, node);                                  \
1619                 }                                                       \
1620                 break
1621 #define case_brw(name, type)                                            \
1622             case jit_code_##name##i##type:                              \
1623                 temp = node->u.n;                                       \
1624                 assert(temp->code == jit_code_label ||                  \
1625                        temp->code == jit_code_epilog);                  \
1626                 if (temp->flag & jit_flag_patch)                        \
1627                     name##i##type(temp->u.w,                            \
1628                                   rn(node->v.w), node->w.w);            \
1629                 else {                                                  \
1630                     word = name##i##type(_jit->pc.w,                    \
1631                                          rn(node->v.w), node->w.w);     \
1632                     patch(word, node);                                  \
1633                 }                                                       \
1634                 break
1635 #define case_rff(name, type)                                            \
1636             case jit_code_##name##r##type:                              \
1637                 if (jit_x87_reg_p(node->v.w) &&                         \
1638                     jit_x87_reg_p(node->w.w))                           \
1639                     x87_##name##r##type(rn(node->u.w), rn(node->v.w),   \
1640                                         rn(node->w.w));                 \
1641                 else                                                    \
1642                     sse_##name##r##type(rn(node->u.w), rn(node->v.w),   \
1643                                         rn(node->w.w));                 \
1644                 break;
1645 #define case_rfw(name, type, size)                                      \
1646             case jit_code_##name##i##type:                              \
1647                 assert(node->flag & jit_flag_data);                     \
1648                 if (jit_x87_reg_p(node->v.w))                           \
1649                     x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1650                                 (jit_float##size##_t *)node->w.n->u.w); \
1651                 else                                                    \
1652                     sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1653                                 (jit_float##size##_t *)node->w.n->u.w); \
1654                 break
1655 #define case_fff(name, type)                                            \
1656             case jit_code_##name##r##type:                              \
1657                 if (jit_x87_reg_p(node->u.w) &&                         \
1658                     jit_x87_reg_p(node->v.w) &&                         \
1659                     jit_x87_reg_p(node->w.w))                           \
1660                     x87_##name##r##type(rn(node->u.w),                  \
1661                                         rn(node->v.w), rn(node->w.w));  \
1662                 else                                                    \
1663                     sse_##name##r##type(rn(node->u.w),                  \
1664                                         rn(node->v.w), rn(node->w.w));  \
1665                 break
1666 #define case_ffw(name, type, size)                                      \
1667             case jit_code_##name##i##type:                              \
1668                 assert(node->flag & jit_flag_data);                     \
1669                 if (jit_x87_reg_p(node->u.w) &&                         \
1670                     jit_x87_reg_p(node->v.w))                           \
1671                     x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1672                                 (jit_float##size##_t *)node->w.n->u.w); \
1673                 else                                                    \
1674                     sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1675                                 (jit_float##size##_t *)node->w.n->u.w); \
1676                 break
1677 #define case_bff(name, type)                                            \
1678             case jit_code_b##name##r##type:                             \
1679                 temp = node->u.n;                                       \
1680                 assert(temp->code == jit_code_label ||                  \
1681                        temp->code == jit_code_epilog);                  \
1682                 if (temp->flag & jit_flag_patch) {                      \
1683                     if (jit_x87_reg_p(node->v.w) &&                     \
1684                         jit_x87_reg_p(node->w.w))                       \
1685                         x87_b##name##r##type(temp->u.w,                 \
1686                                 rn(node->v.w), rn(node->w.w));          \
1687                     else                                                \
1688                         sse_b##name##r##type(temp->u.w,                 \
1689                                 rn(node->v.w), rn(node->w.w));          \
1690                 }                                                       \
1691                 else {                                                  \
1692                     if (jit_x87_reg_p(node->v.w) &&                     \
1693                         jit_x87_reg_p(node->w.w))                       \
1694                         word = x87_b##name##r##type(_jit->pc.w,         \
1695                                 rn(node->v.w), rn(node->w.w));          \
1696                     else                                                \
1697                         word = sse_b##name##r##type(_jit->pc.w,         \
1698                                 rn(node->v.w), rn(node->w.w));          \
1699                     patch(word, node);                                  \
1700                 }                                                       \
1701                 break
1702 #define case_bfw(name, type, size)                                      \
1703             case jit_code_b##name##i##type:                             \
1704                 temp = node->u.n;                                       \
1705                 assert(temp->code == jit_code_label ||                  \
1706                        temp->code == jit_code_epilog);                  \
1707                 if (temp->flag & jit_flag_patch) {                      \
1708                     if (jit_x87_reg_p(node->v.w))                       \
1709                         x87_b##name##i##type(temp->u.w,                 \
1710                                 rn(node->v.w),                          \
1711                                 (jit_float##size##_t *)node->w.n->u.w); \
1712                     else                                                \
1713                         sse_b##name##i##type(temp->u.w,                 \
1714                                 rn(node->v.w),                          \
1715                                 (jit_float##size##_t *)node->w.n->u.w); \
1716                 }                                                       \
1717                 else {                                                  \
1718                     if (jit_x87_reg_p(node->v.w))                       \
1719                         word = x87_b##name##i##type(_jit->pc.w,         \
1720                                 rn(node->v.w),                          \
1721                                 (jit_float##size##_t *)node->w.n->u.w); \
1722                     else                                                \
1723                         word = sse_b##name##i##type(_jit->pc.w,         \
1724                                 rn(node->v.w),                          \
1725                                 (jit_float##size##_t *)node->w.n->u.w); \
1726                     patch(word, node);                                  \
1727                 }                                                       \
1728                 break
1729 #if DEVEL_DISASSEMBLER
1730     prevw = _jit->pc.w;
1731 #endif
1732     for (node = _jitc->head; node; node = node->next) {
1733         if (_jit->pc.uc >= _jitc->code.end)
1734             return (NULL);
1735
1736 #if DEVEL_DISASSEMBLER
1737         node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
1738         prevw = _jit->pc.w;
1739 #endif
1740         value = jit_classify(node->code);
1741         jit_regarg_set(node, value);
1742         switch (node->code) {
1743             case jit_code_align:
1744                 /* Must align to a power of two */
1745                 assert(!(node->u.w & (node->u.w - 1)));
1746                 if ((word = _jit->pc.w & (node->u.w - 1)))
1747                     nop(node->u.w - word);
1748                 break;
1749             case jit_code_skip:
1750                 nop(node->u.w);
1751                 break;
1752             case jit_code_note:         case jit_code_name:
1753                 node->u.w = _jit->pc.w;
1754                 break;
1755             case jit_code_label:
1756                 if ((node->link || (node->flag & jit_flag_use)) &&
1757                     (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
1758                     nop(sizeof(jit_word_t) - word);
1759                 /* remember label is defined */
1760                 node->flag |= jit_flag_patch;
1761                 node->u.w = _jit->pc.w;
1762                 break;
1763                 case_rrr(add,);
1764                 case_rrw(add,);
1765                 case_rrr(addx,);
1766                 case_rrw(addx,);
1767                 case_rrr(addc,);
1768                 case_rrw(addc,);
1769                 case_rrr(sub,);
1770                 case_rrw(sub,);
1771                 case_rrr(subx,);
1772                 case_rrw(subx,);
1773                 case_rrr(subc,);
1774                 case_rrw(subc,);
1775                 case_rrw(rsb,);
1776                 case_rrr(mul,);
1777                 case_rrw(mul,);
1778                 case_rrrr(qmul,);
1779                 case_rrrw(qmul,);
1780                 case_rrrr(qmul, _u);
1781                 case_rrrw(qmul, _u);
1782                 case_rrr(div,);
1783                 case_rrw(div,);
1784                 case_rrr(div, _u);
1785                 case_rrw(div, _u);
1786                 case_rrrr(qdiv,);
1787                 case_rrrw(qdiv,);
1788                 case_rrrr(qdiv, _u);
1789                 case_rrrw(qdiv, _u);
1790                 case_rrr(rem,);
1791                 case_rrw(rem,);
1792                 case_rrr(rem, _u);
1793                 case_rrw(rem, _u);
1794                 case_rrr(and,);
1795                 case_rrw(and,);
1796                 case_rrr(or,);
1797                 case_rrw(or,);
1798                 case_rrr(xor,);
1799                 case_rrw(xor,);
1800                 case_rrr(lsh,);
1801                 case_rrw(lsh,);
1802                 case_rrr(rsh,);
1803                 case_rrw(rsh,);
1804                 case_rrr(rsh, _u);
1805                 case_rrw(rsh, _u);
1806                 case_rr(neg,);
1807                 case_rr(com,);
1808                 case_rr(clo,);
1809                 case_rr(clz,);
1810                 case_rr(cto,);
1811                 case_rr(ctz,);
1812                 case_rrr(lt,);
1813                 case_rrw(lt,);
1814                 case_rrr(lt, _u);
1815                 case_rrw(lt, _u);
1816                 case_rrr(le,);
1817                 case_rrw(le,);
1818                 case_rrr(le, _u);
1819                 case_rrw(le, _u);
1820                 case_rrr(eq,);
1821                 case_rrw(eq,);
1822                 case_rrr(ge,);
1823                 case_rrw(ge,);
1824                 case_rrr(ge, _u);
1825                 case_rrw(ge, _u);
1826                 case_rrr(gt,);
1827                 case_rrw(gt,);
1828                 case_rrr(gt, _u);
1829                 case_rrw(gt, _u);
1830                 case_rrr(ne,);
1831                 case_rrw(ne,);
1832             case jit_code_casr:
1833                 casr(rn(node->u.w), rn(node->v.w),
1834                      rn(node->w.q.l), rn(node->w.q.h));
1835                 break;
1836             case jit_code_casi:
1837                 casi(rn(node->u.w), node->v.w,
1838                      rn(node->w.q.l), rn(node->w.q.h));
1839                 break;
1840                 case_rrr(movn,);
1841                 case_rrr(movz,);
1842                 case_rr(mov,);
1843             case jit_code_movi:
1844                 if (node->flag & jit_flag_node) {
1845                     temp = node->v.n;
1846                     if (temp->code == jit_code_data ||
1847                         (temp->code == jit_code_label &&
1848                          (temp->flag & jit_flag_patch)))
1849                         movi(rn(node->u.w), temp->u.w);
1850                     else {
1851                         assert(temp->code == jit_code_label ||
1852                                temp->code == jit_code_epilog);
1853 #if CAN_RIP_ADDRESS
1854                         word = _jit->code.length -
1855                             (_jit->pc.uc - _jit->code.ptr);
1856                         if ((jit_int32_t)word == word)
1857                             word = movi(rn(node->u.w), _jit->pc.w);
1858                         else
1859 #endif
1860                             word = movi_p(rn(node->u.w), node->v.w);
1861                         patch(word, node);
1862                     }
1863                 }
1864                 else
1865                     movi(rn(node->u.w), node->v.w);
1866                 break;
1867                 case_rr(hton, _us);
1868                 case_rr(hton, _ui);
1869 #if __X64 && !__X64_32
1870                 case_rr(hton, _ul);
1871 #endif
1872                 case_rr(bswap, _us);
1873                 case_rr(bswap, _ui);
1874 #if __X64 && !__X64_32
1875                 case_rr(bswap, _ul);
1876 #endif
1877                 case_rr(ext, _c);
1878                 case_rr(ext, _uc);
1879                 case_rr(ext, _s);
1880                 case_rr(ext, _us);
1881 #if __X64 && !__X64_32
1882                 case_rr(ext, _i);
1883                 case_rr(ext, _ui);
1884 #endif
1885                 case_rf(trunc, _f_i);
1886                 case_rf(trunc, _d_i);
1887 #if __X64
1888                 case_rf(trunc, _f_l);
1889                 case_rf(trunc, _d_l);
1890 #endif
1891                 case_rr(ld, _c);
1892                 case_rw(ld, _c);
1893                 case_rr(ld, _uc);
1894                 case_rw(ld, _uc);
1895                 case_rr(ld, _s);
1896                 case_rw(ld, _s);
1897                 case_rr(ld, _us);
1898                 case_rw(ld, _us);
1899                 case_rr(ld, _i);
1900                 case_rw(ld, _i);
1901 #if __X64 && !__X64_32
1902                 case_rr(ld, _ui);
1903                 case_rw(ld, _ui);
1904                 case_rr(ld, _l);
1905                 case_rw(ld, _l);
1906 #endif
1907                 case_rrr(ldx, _c);
1908                 case_rrw(ldx, _c);
1909                 case_rrr(ldx, _uc);
1910                 case_rrw(ldx, _uc);
1911                 case_rrr(ldx, _s);
1912                 case_rrw(ldx, _s);
1913                 case_rrr(ldx, _us);
1914                 case_rrw(ldx, _us);
1915                 case_rrr(ldx, _i);
1916                 case_rrw(ldx, _i);
1917 #if __X64 && !__X64_32
1918                 case_rrr(ldx, _ui);
1919                 case_rrw(ldx, _ui);
1920                 case_rrr(ldx, _l);
1921                 case_rrw(ldx, _l);
1922 #endif
1923                 case_rr(st, _c);
1924                 case_wr(st, _c);
1925                 case_rr(st, _s);
1926                 case_wr(st, _s);
1927                 case_rr(st, _i);
1928                 case_wr(st, _i);
1929 #if __X64 && !__X64_32
1930                 case_rr(st, _l);
1931                 case_wr(st, _l);
1932 #endif
1933                 case_rrr(stx, _c);
1934                 case_wrr(stx, _c);
1935                 case_rrr(stx, _s);
1936                 case_wrr(stx, _s);
1937                 case_rrr(stx, _i);
1938                 case_wrr(stx, _i);
1939 #if __X64 && !__X64_32
1940                 case_rrr(stx, _l);
1941                 case_wrr(stx, _l);
1942 #endif
1943                 case_brr(blt,);
1944                 case_brw(blt,);
1945                 case_brr(blt, _u);
1946                 case_brw(blt, _u);
1947                 case_brr(ble,);
1948                 case_brw(ble,);
1949                 case_brr(ble, _u);
1950                 case_brw(ble, _u);
1951                 case_brr(beq,);
1952                 case_brw(beq,);
1953                 case_brr(bge,);
1954                 case_brw(bge,);
1955                 case_brr(bge, _u);
1956                 case_brw(bge, _u);
1957                 case_brr(bgt,);
1958                 case_brw(bgt,);
1959                 case_brr(bgt, _u);
1960                 case_brw(bgt, _u);
1961                 case_brr(bne,);
1962                 case_brw(bne,);
1963                 case_brr(bms,);
1964                 case_brw(bms,);
1965                 case_brr(bmc,);
1966                 case_brw(bmc,);
1967                 case_brr(boadd,);
1968                 case_brw(boadd,);
1969                 case_brr(boadd, _u);
1970                 case_brw(boadd, _u);
1971                 case_brr(bxadd,);
1972                 case_brw(bxadd,);
1973                 case_brr(bxadd, _u);
1974                 case_brw(bxadd, _u);
1975                 case_brr(bosub,);
1976                 case_brw(bosub,);
1977                 case_brr(bosub, _u);
1978                 case_brw(bosub, _u);
1979                 case_brr(bxsub,);
1980                 case_brw(bxsub,);
1981                 case_brr(bxsub, _u);
1982                 case_brw(bxsub, _u);
1983                 case_fff(add, _f);
1984                 case_ffw(add, _f, 32);
1985                 case_fff(sub, _f);
1986                 case_ffw(sub, _f, 32);
1987                 case_ffw(rsb, _f, 32);
1988                 case_fff(mul, _f);
1989                 case_ffw(mul, _f, 32);
1990                 case_fff(div, _f);
1991                 case_ffw(div, _f, 32);
1992                 case_ff(abs, _f);
1993                 case_ff(neg, _f);
1994                 case_ff(sqrt, _f);
1995                 case_fr(ext, _f);
1996                 case_fr(ext, _d_f);
1997                 case_rff(lt, _f);
1998                 case_rfw(lt, _f, 32);
1999                 case_rff(le, _f);
2000                 case_rfw(le, _f, 32);
2001                 case_rff(eq, _f);
2002                 case_rfw(eq, _f, 32);
2003                 case_rff(ge, _f);
2004                 case_rfw(ge, _f, 32);
2005                 case_rff(gt, _f);
2006                 case_rfw(gt, _f, 32);
2007                 case_rff(ne, _f);
2008                 case_rfw(ne, _f, 32);
2009                 case_rff(unlt, _f);
2010                 case_rfw(unlt, _f, 32);
2011                 case_rff(unle, _f);
2012                 case_rfw(unle, _f, 32);
2013                 case_rff(uneq, _f);
2014                 case_rfw(uneq, _f, 32);
2015                 case_rff(unge, _f);
2016                 case_rfw(unge, _f, 32);
2017                 case_rff(ungt, _f);
2018                 case_rfw(ungt, _f, 32);
2019                 case_rff(ltgt, _f);
2020                 case_rfw(ltgt, _f, 32);
2021                 case_rff(ord, _f);
2022                 case_rfw(ord, _f, 32);
2023                 case_rff(unord, _f);
2024                 case_rfw(unord, _f, 32);
2025             case jit_code_movr_f:
2026                 if (jit_x87_reg_p(node->u.w)) {
2027                     if (jit_x87_reg_p(node->v.w))
2028                         x87_movr_f(rn(node->u.w), rn(node->v.w));
2029                     else
2030                         x87_from_sse_f(rn(node->u.w), rn(node->v.w));
2031                 }
2032                 else {
2033                     if (jit_sse_reg_p(node->v.w))
2034                         sse_movr_f(rn(node->u.w), rn(node->v.w));
2035                     else
2036                         sse_from_x87_f(rn(node->u.w), rn(node->v.w));
2037                 }
2038                 break;
2039             case jit_code_movi_f:
2040                 assert(node->flag & jit_flag_data);
2041                 if (jit_x87_reg_p(node->u.w))
2042                     x87_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
2043                 else
2044                     sse_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
2045                 break;
2046                 case_fr(ld, _f);
2047                 case_fw(ld, _f);
2048                 case_frr(ldx, _f);
2049                 case_frw(ldx, _f);
2050                 case_rf(st, _f);
2051                 case_wf(st, _f);
2052                 case_rrf(stx, _f);
2053                 case_wrf(stx, _f);
2054                 case_bff(lt, _f);
2055                 case_bfw(lt, _f, 32);
2056                 case_bff(le, _f);
2057                 case_bfw(le, _f, 32);
2058                 case_bff(eq, _f);
2059                 case_bfw(eq, _f, 32);
2060                 case_bff(ge, _f);
2061                 case_bfw(ge, _f, 32);
2062                 case_bff(gt, _f);
2063                 case_bfw(gt, _f, 32);
2064                 case_bff(ne, _f);
2065                 case_bfw(ne, _f, 32);
2066                 case_bff(unlt, _f);
2067                 case_bfw(unlt, _f, 32);
2068                 case_bff(unle, _f);
2069                 case_bfw(unle, _f, 32);
2070                 case_bff(uneq, _f);
2071                 case_bfw(uneq, _f, 32);
2072                 case_bff(unge, _f);
2073                 case_bfw(unge, _f, 32);
2074                 case_bff(ungt, _f);
2075                 case_bfw(ungt, _f, 32);
2076                 case_bff(ltgt, _f);
2077                 case_bfw(ltgt, _f, 32);
2078                 case_bff(ord, _f);
2079                 case_bfw(ord, _f, 32);
2080                 case_bff(unord, _f);
2081                 case_bfw(unord, _f, 32);
2082                 case_fff(add, _d);
2083                 case_ffw(add, _d, 64);
2084                 case_fff(sub, _d);
2085                 case_ffw(sub, _d, 64);
2086                 case_ffw(rsb, _d, 64);
2087                 case_fff(mul, _d);
2088                 case_ffw(mul, _d, 64);
2089                 case_fff(div, _d);
2090                 case_ffw(div, _d, 64);
2091                 case_ff(abs, _d);
2092                 case_ff(neg, _d);
2093                 case_ff(sqrt, _d);
2094                 case_fr(ext, _d);
2095                 case_fr(ext, _f_d);
2096                 case_rff(lt, _d);
2097                 case_rfw(lt, _d, 64);
2098                 case_rff(le, _d);
2099                 case_rfw(le, _d, 64);
2100                 case_rff(eq, _d);
2101                 case_rfw(eq, _d, 64);
2102                 case_rff(ge, _d);
2103                 case_rfw(ge, _d, 64);
2104                 case_rff(gt, _d);
2105                 case_rfw(gt, _d, 64);
2106                 case_rff(ne, _d);
2107                 case_rfw(ne, _d, 64);
2108                 case_rff(unlt, _d);
2109                 case_rfw(unlt, _d, 64);
2110                 case_rff(unle, _d);
2111                 case_rfw(unle, _d, 64);
2112                 case_rff(uneq, _d);
2113                 case_rfw(uneq, _d, 64);
2114                 case_rff(unge, _d);
2115                 case_rfw(unge, _d, 64);
2116                 case_rff(ungt, _d);
2117                 case_rfw(ungt, _d, 64);
2118                 case_rff(ltgt, _d);
2119                 case_rfw(ltgt, _d, 64);
2120                 case_rff(ord, _d);
2121                 case_rfw(ord, _d, 64);
2122                 case_rff(unord, _d);
2123                 case_rfw(unord, _d, 64);
2124             case jit_code_movr_d:
2125                 if (jit_x87_reg_p(node->u.w)) {
2126                     if (jit_x87_reg_p(node->v.w))
2127                         x87_movr_d(rn(node->u.w), rn(node->v.w));
2128                     else
2129                         x87_from_sse_d(rn(node->u.w), rn(node->v.w));
2130                 }
2131                 else {
2132                     if (jit_sse_reg_p(node->v.w))
2133                         sse_movr_d(rn(node->u.w), rn(node->v.w));
2134                     else
2135                         sse_from_x87_d(rn(node->u.w), rn(node->v.w));
2136                 }
2137                 break;
2138             case jit_code_movi_d:
2139                 assert(node->flag & jit_flag_data);
2140                 if (jit_x87_reg_p(node->u.w))
2141                     x87_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
2142                 else
2143                     sse_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
2144                 break;
2145                 case_fr(ld, _d);
2146                 case_fw(ld, _d);
2147                 case_frr(ldx, _d);
2148                 case_frw(ldx, _d);
2149                 case_rf(st, _d);
2150                 case_wf(st, _d);
2151                 case_rrf(stx, _d);
2152                 case_wrf(stx, _d);
2153                 case_bff(lt, _d);
2154                 case_bfw(lt, _d, 64);
2155                 case_bff(le, _d);
2156                 case_bfw(le, _d, 64);
2157                 case_bff(eq, _d);
2158                 case_bfw(eq, _d, 64);
2159                 case_bff(ge, _d);
2160                 case_bfw(ge, _d, 64);
2161                 case_bff(gt, _d);
2162                 case_bfw(gt, _d, 64);
2163                 case_bff(ne, _d);
2164                 case_bfw(ne, _d, 64);
2165                 case_bff(unlt, _d);
2166                 case_bfw(unlt, _d, 64);
2167                 case_bff(unle, _d);
2168                 case_bfw(unle, _d, 64);
2169                 case_bff(uneq, _d);
2170                 case_bfw(uneq, _d, 64);
2171                 case_bff(unge, _d);
2172                 case_bfw(unge, _d, 64);
2173                 case_bff(ungt, _d);
2174                 case_bfw(ungt, _d, 64);
2175                 case_bff(ltgt, _d);
2176                 case_bfw(ltgt, _d, 64);
2177                 case_bff(ord, _d);
2178                 case_bfw(ord, _d, 64);
2179                 case_bff(unord, _d);
2180                 case_bfw(unord, _d, 64);
2181             case jit_code_jmpr:
2182                 jit_check_frame();
2183                 jmpr(rn(node->u.w));
2184                 break;
2185             case jit_code_jmpi:
2186                 if (node->flag & jit_flag_node) {
2187                     temp = node->u.n;
2188                     assert(temp->code == jit_code_label ||
2189                            temp->code == jit_code_epilog);
2190                     if (temp->flag & jit_flag_patch)
2191                         jmpi(temp->u.w);
2192                     else {
2193 #if __X64
2194                         word = _jit->code.length -
2195                             (_jit->pc.uc - _jit->code.ptr);
2196                         if ((jit_int32_t)word == word)
2197                             word = jmpi(_jit->pc.w);
2198                         else
2199 #endif
2200                             word = jmpi_p(_jit->pc.w);
2201                         patch(word, node);
2202                     }
2203                 }
2204                 else {
2205                     jit_check_frame();
2206                     jmpi(node->u.w);
2207                 }
2208                 break;
2209             case jit_code_callr:
2210                 jit_check_frame();
2211                 callr(rn(node->u.w));
2212                 break;
2213             case jit_code_calli:
2214                 if (node->flag & jit_flag_node) {
2215                     temp = node->u.n;
2216                     assert(temp->code == jit_code_label ||
2217                            temp->code == jit_code_epilog);
2218                     if (temp->flag & jit_flag_patch)
2219                         calli(temp->u.w);
2220                     else {
2221 #if __X64
2222                         word = _jit->code.length -
2223                             (_jit->pc.uc - _jit->code.ptr);
2224                         if ((jit_int32_t)word == word)
2225                             word = calli(_jit->pc.w);
2226                         else
2227 #endif
2228                             word = calli_p(_jit->pc.w);
2229                         patch(word, node);
2230                     }
2231                 }
2232                 else {
2233                     jit_check_frame();
2234                     calli(node->u.w);
2235                 }
2236                 break;
2237             case jit_code_prolog:
2238                 _jitc->function = _jitc->functions.ptr + node->w.w;
2239                 undo.node = node;
2240                 undo.word = _jit->pc.w;
2241                 memcpy(&undo.func, _jitc->function, sizeof(undo.func));
2242 #if DEVEL_DISASSEMBLER
2243                 undo.prevw = prevw;
2244 #endif
2245                 undo.patch_offset = _jitc->patches.offset;
2246             restart_function:
2247                 compute_framesize();
2248                 patch_alist(0);
2249                 _jitc->again = 0;
2250                 prolog(node);
2251                 break;
2252             case jit_code_epilog:
2253                 assert(_jitc->function == _jitc->functions.ptr + node->w.w);
2254                 if (_jitc->again) {
2255                     for (temp = undo.node->next;
2256                          temp != node; temp = temp->next) {
2257                         if (temp->code == jit_code_label ||
2258                             temp->code == jit_code_epilog)
2259                             temp->flag &= ~jit_flag_patch;
2260                     }
2261                     temp->flag &= ~jit_flag_patch;
2262                     node = undo.node;
2263                     _jit->pc.w = undo.word;
2264                     /* undo.func.self.aoff and undo.func.regset should not
2265                      * be undone, as they will be further updated, and are
2266                      * the reason of the undo. */
2267                     undo.func.self.aoff = _jitc->function->frame +
2268                         _jitc->function->self.aoff;
2269                     undo.func.need_frame = _jitc->function->need_frame;
2270                     jit_regset_set(&undo.func.regset, &_jitc->function->regset);
2271                     /* allocar information also does not need to be undone */
2272                     undo.func.aoffoff = _jitc->function->aoffoff;
2273                     undo.func.allocar = _jitc->function->allocar;
2274                     /* real stack framesize is not in the jit_function_t,
2275                      * if it were, would need to not be undone  */
2276                     /* cvt_offset must also not be undone */
2277                     undo.func.cvt_offset = _jitc->function->cvt_offset;
2278                     /* this will be recomputed but undo anyway to have it
2279                      * better self documented.*/
2280                     undo.func.need_stack = _jitc->function->need_stack;
2281                     memcpy(_jitc->function, &undo.func, sizeof(undo.func));
2282 #if DEVEL_DISASSEMBLER
2283                     prevw = undo.prevw;
2284 #endif
2285                     _jitc->patches.offset = undo.patch_offset;
2286                     patch_alist(1);
2287                     goto restart_function;
2288                 }
2289                 if (node->link &&
2290                     (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
2291                     nop(sizeof(jit_word_t) - word);
2292                 /* remember label is defined */
2293                 node->flag |= jit_flag_patch;
2294                 node->u.w = _jit->pc.w;
2295                 epilog(node);
2296                 _jitc->function = NULL;
2297                 break;
2298             case jit_code_va_start:
2299                 vastart(rn(node->u.w));
2300                 break;
2301             case jit_code_va_arg:
2302                 vaarg(rn(node->u.w), rn(node->v.w));
2303                 break;
2304             case jit_code_va_arg_d:
2305                 vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w));
2306                 break;
2307             case jit_code_live:                 case jit_code_ellipsis:
2308             case jit_code_va_push:
2309             case jit_code_allocai:              case jit_code_allocar:
2310             case jit_code_arg_c:                case jit_code_arg_s:
2311             case jit_code_arg_i:
2312 #  if __WORDSIZE == 64
2313             case jit_code_arg_l:
2314 #  endif
2315             case jit_code_arg_f:                case jit_code_arg_d:
2316             case jit_code_va_end:
2317             case jit_code_ret:
2318             case jit_code_retr_c:               case jit_code_reti_c:
2319             case jit_code_retr_uc:              case jit_code_reti_uc:
2320             case jit_code_retr_s:               case jit_code_reti_s:
2321             case jit_code_retr_us:              case jit_code_reti_us:
2322             case jit_code_retr_i:               case jit_code_reti_i:
2323 #if __WORDSIZE == 64
2324             case jit_code_retr_ui:              case jit_code_reti_ui:
2325             case jit_code_retr_l:               case jit_code_reti_l:
2326 #endif
2327             case jit_code_retr_f:               case jit_code_reti_f:
2328             case jit_code_retr_d:               case jit_code_reti_d:
2329             case jit_code_getarg_c:             case jit_code_getarg_uc:
2330             case jit_code_getarg_s:             case jit_code_getarg_us:
2331             case jit_code_getarg_i:
2332 #if __X64 && !__X64_32
2333             case jit_code_getarg_ui:            case jit_code_getarg_l:
2334 #endif
2335             case jit_code_getarg_f:             case jit_code_getarg_d:
2336             case jit_code_putargr_c:            case jit_code_putargi_c:
2337             case jit_code_putargr_uc:           case jit_code_putargi_uc:
2338             case jit_code_putargr_s:            case jit_code_putargi_s:
2339             case jit_code_putargr_us:           case jit_code_putargi_us:
2340             case jit_code_putargr_i:            case jit_code_putargi_i:
2341 #if __WORDSIZE == 64
2342             case jit_code_putargr_ui:           case jit_code_putargi_ui:
2343             case jit_code_putargr_l:            case jit_code_putargi_l:
2344 #endif
2345             case jit_code_putargr_f:            case jit_code_putargi_f:
2346             case jit_code_putargr_d:            case jit_code_putargi_d:
2347             case jit_code_pushargr_c:           case jit_code_pushargi_c:
2348             case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
2349             case jit_code_pushargr_s:           case jit_code_pushargi_s:
2350             case jit_code_pushargr_us:          case jit_code_pushargi_us:
2351             case jit_code_pushargr_i:           case jit_code_pushargi_i:
2352 #if __WORDSIZE == 64
2353             case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
2354             case jit_code_pushargr_l:           case jit_code_pushargi_l:
2355 #endif
2356             case jit_code_pushargr_f:           case jit_code_pushargi_f:
2357             case jit_code_pushargr_d:           case jit_code_pushargi_d:
2358             case jit_code_retval_c:             case jit_code_retval_uc:
2359             case jit_code_retval_s:             case jit_code_retval_us:
2360             case jit_code_retval_i:
2361 #if __X64 && !__X32
2362             case jit_code_retval_ui:            case jit_code_retval_l:
2363 #endif
2364             case jit_code_prepare:
2365             case jit_code_finishr:              case jit_code_finishi:
2366                 break;
2367             case jit_code_retval_f:
2368 #if __X32
2369                 if (jit_sse_reg_p(node->u.w)) {
2370                     fstpr(_ST1_REGNO);
2371                     sse_from_x87_f(rn(node->u.w), _ST0_REGNO);
2372                 }
2373                 else
2374                     fstpr(rn(node->u.w) + 1);
2375 #endif
2376                 break;
2377             case jit_code_retval_d:
2378 #if __X32
2379                 if (jit_sse_reg_p(node->u.w)) {
2380                     fstpr(_ST1_REGNO);
2381                     sse_from_x87_d(rn(node->u.w), _ST0_REGNO);
2382                 }
2383                 else
2384                     fstpr(rn(node->u.w) + 1);
2385 #endif
2386                 break;
2387             default:
2388                 abort();
2389         }
2390         jit_regarg_clr(node, value);
2391         assert(_jitc->regarg == 0 && _jitc->synth == 0);
2392         /* update register live state */
2393         jit_reglive(node);
2394     }
2395 #undef case_bfw
2396 #undef case_bff
2397 #undef case_ffw
2398 #undef case_rfw
2399 #undef case_rff
2400 #undef case_brw
2401 #undef case_brr
2402 #undef case_wrf
2403 #undef case_wrr
2404 #undef case_frw
2405 #undef case_rrf
2406 #undef case_rrw
2407 #undef case_frr
2408 #undef case_rrr
2409 #undef case_wf
2410 #undef case_fw
2411 #undef case_fr
2412 #undef case_rr
2413
2414     for (offset = 0; offset < _jitc->patches.offset; offset++) {
2415         node = _jitc->patches.ptr[offset].node;
2416         word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
2417         patch_at(_jitc->patches.ptr[offset].inst, word);
2418     }
2419
2420     jit_flush(_jit->code.ptr, _jit->pc.uc);
2421
2422     return (_jit->code.ptr);
2423 }
2424
2425 #define CODE                            1
2426 #  include "jit_x86-cpu.c"
2427 #  include "jit_x86-sse.c"
2428 #  include "jit_x86-x87.c"
2429 #undef CODE
2430
2431 void
2432 jit_flush(void *fptr, void *tptr)
2433 {
2434 }
2435
2436 void
2437 _emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
2438 {
2439     ldxi(rn(r0), rn(r1), i0);
2440 }
2441
2442 void
2443 _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
2444 {
2445     stxi(i0, rn(r0), rn(r1));
2446 }
2447
2448 void
2449 _emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
2450 {
2451     if (jit_x87_reg_p(r0))
2452         x87_ldxi_d(rn(r0), rn(r1), i0);
2453     else
2454         sse_ldxi_d(rn(r0), rn(r1), i0);
2455 }
2456
2457 void
2458 _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
2459 {
2460     if (jit_x87_reg_p(r1))
2461         x87_stxi_d(i0, rn(r0), rn(r1));
2462     else
2463         sse_stxi_d(i0, rn(r0), rn(r1));
2464 }
2465
2466 static void
2467 _compute_framesize(jit_state_t *_jit)
2468 {
2469     jit_int32_t         reg;
2470     /* Save stack pointer in first slot */
2471     _jitc->framesize = REAL_WORDSIZE;
2472     for (reg = 0; reg < jit_size(iregs); reg++)
2473         if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
2474             _jitc->framesize += REAL_WORDSIZE;
2475
2476 #if __X64 && (__CYGWIN__ || _WIN32)
2477     for (reg = 0; reg < jit_size(fregs); reg++)
2478         if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
2479             _jitc->framesize += sizeof(jit_float64_t);
2480 #endif
2481     /* Make sure functions called have a 16 byte aligned stack */
2482     _jitc->framesize = (_jitc->framesize + 15) & -16;
2483     _jitc->framesize += 16 - REAL_WORDSIZE;
2484 }
2485
2486 static void
2487 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
2488 {
2489     jit_int32_t         flag;
2490
2491     assert(node->flag & jit_flag_node);
2492     if (node->code == jit_code_movi)
2493         flag = node->v.n->flag;
2494     else
2495         flag = node->u.n->flag;
2496     assert(!(flag & jit_flag_patch));
2497     if (_jitc->patches.offset >= _jitc->patches.length) {
2498         jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
2499                     _jitc->patches.length * sizeof(jit_patch_t),
2500                     (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
2501         _jitc->patches.length += 1024;
2502     }
2503     _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
2504     _jitc->patches.ptr[_jitc->patches.offset].node = node;
2505     ++_jitc->patches.offset;
2506 }
2507
2508 static void
2509 _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2510 {
2511     CHECK_CVT_OFFSET();
2512     x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2513     sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2514 }
2515
2516 static void
2517 _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2518 {
2519     CHECK_CVT_OFFSET();
2520     x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2521     sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2522 }
2523
2524 static void
2525 _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2526 {
2527     CHECK_CVT_OFFSET();
2528     sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2529     x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2530 }
2531
2532 static void
2533 _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2534 {
2535     CHECK_CVT_OFFSET();
2536     sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2537     x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2538 }