e3e1383469ba75a6914ec860295c84df36e2c043
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86.c
1 /*
2  * Copyright (C) 2012-2019  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #include <lightning.h>
21 #include <lightning/jit_private.h>
22
23 #if __X32
24 #  define jit_arg_reg_p(i)              0
25 #  define jit_arg_f_reg_p(i)            0
26 #  define stack_framesize               20
27 #  define stack_adjust                  12
28 #  define CVT_OFFSET                    -12
29 #  define REAL_WORDSIZE                 4
30 #  define va_gp_increment               4
31 #  define va_fp_increment               8
32 #else
33 #  if __CYGWIN__ || _WIN32
34 #    define jit_arg_reg_p(i)            ((i) >= 0 && (i) < 4)
35 #    define jit_arg_f_reg_p(i)          jit_arg_reg_p(i)
36 #    define stack_framesize             152
37 #    define va_fp_increment             8
38 #  else
39 #    define jit_arg_reg_p(i)            ((i) >= 0 && (i) < 6)
40 #    define jit_arg_f_reg_p(i)          ((i) >= 0 && (i) < 8)
41 #    define stack_framesize             56
42 #    define first_gp_argument           rdi
43 #    define first_gp_offset             offsetof(jit_va_list_t, rdi)
44 #    define first_gp_from_offset(gp)    ((gp) / 8)
45 #    define last_gp_argument            r9
46 #    define va_gp_max_offset                                            \
47         (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
48 #    define first_fp_argument           xmm0
49 #    define first_fp_offset             offsetof(jit_va_list_t, xmm0)
50 #    define last_fp_argument            xmm7
51 #    define va_fp_max_offset                                            \
52         (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
53 #    define va_fp_increment             16
54 #    define first_fp_from_offset(fp)    (((fp) - va_gp_max_offset) / 16)
55 #  endif
56 #  define va_gp_increment               8
57 #  define stack_adjust                  8
58 #  define CVT_OFFSET                    -8
59 #  define REAL_WORDSIZE                 8
60 #endif
61
62 /*
63  * Types
64  */
65 #if __X32 || __CYGWIN__ || _WIN32
66 typedef jit_pointer_t jit_va_list_t;
67 #else
68 typedef struct jit_va_list {
69     jit_int32_t         gpoff;
70     jit_int32_t         fpoff;
71     jit_pointer_t       over;
72     jit_pointer_t       save;
73     /* Declared explicitly as int64 for the x32 abi */
74     jit_int64_t         rdi;
75     jit_int64_t         rsi;
76     jit_int64_t         rdx;
77     jit_int64_t         rcx;
78     jit_int64_t         r8;
79     jit_int64_t         r9;
80     jit_float64_t       xmm0;
81     jit_float64_t       _up0;
82     jit_float64_t       xmm1;
83     jit_float64_t       _up1;
84     jit_float64_t       xmm2;
85     jit_float64_t       _up2;
86     jit_float64_t       xmm3;
87     jit_float64_t       _up3;
88     jit_float64_t       xmm4;
89     jit_float64_t       _up4;
90     jit_float64_t       xmm5;
91     jit_float64_t       _up5;
92     jit_float64_t       xmm6;
93     jit_float64_t       _up6;
94     jit_float64_t       xmm7;
95     jit_float64_t       _up7;
96 } jit_va_list_t;
97 #endif
98
99 /*
100  * Prototypes
101  */
102 #define patch(instr, node)              _patch(_jit, instr, node)
103 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
104 #define sse_from_x87_f(r0, r1)          _sse_from_x87_f(_jit, r0, r1)
105 static void _sse_from_x87_f(jit_state_t*,jit_int32_t,jit_int32_t);
106 #define sse_from_x87_d(r0, r1)          _sse_from_x87_d(_jit, r0, r1)
107 static void _sse_from_x87_d(jit_state_t*,jit_int32_t,jit_int32_t);
108 #define x87_from_sse_f(r0, r1)          _x87_from_sse_f(_jit, r0, r1)
109 static void _x87_from_sse_f(jit_state_t*,jit_int32_t,jit_int32_t);
110 #define x87_from_sse_d(r0, r1)          _x87_from_sse_d(_jit, r0, r1)
111 static void _x87_from_sse_d(jit_state_t*,jit_int32_t,jit_int32_t);
112
113 #define PROTO                           1
114 #  include "jit_x86-cpu.c"
115 #  include "jit_x86-sse.c"
116 #  include "jit_x86-x87.c"
117 #undef PROTO
118
119 /*
120  * Initialization
121  */
122 jit_cpu_t               jit_cpu;
123 jit_register_t          _rvs[] = {
124 #if __X32
125     { rc(gpr) | rc(rg8) | 0,            "%eax" },
126     { rc(gpr) | rc(rg8) | 1,            "%ecx" },
127     { rc(gpr) | rc(rg8) | 2,            "%edx" },
128     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%ebx" },
129     { rc(sav) | rc(gpr) | 6,            "%esi" },
130     { rc(sav) | rc(gpr) | 7,            "%edi" },
131     { rc(sav) | 4,                      "%esp" },
132     { rc(sav) | 5,                      "%ebp" },
133     { rc(xpr) | rc(fpr) | 0,            "%xmm0" },
134     { rc(xpr) | rc(fpr) | 1,            "%xmm1" },
135     { rc(xpr) | rc(fpr) | 2,            "%xmm2" },
136     { rc(xpr) | rc(fpr) | 3,            "%xmm3" },
137     { rc(xpr) | rc(fpr) | 4,            "%xmm4" },
138     { rc(xpr) | rc(fpr) | 5,            "%xmm5" },
139     { rc(xpr) | rc(fpr) | 6,            "%xmm6" },
140     { rc(xpr) | rc(fpr) | 7,            "%xmm7" },
141     { rc(fpr) | 0,                      "st(0)" },
142     { rc(fpr) | 1,                      "st(1)" },
143     { rc(fpr) | 2,                      "st(2)" },
144     { rc(fpr) | 3,                      "st(3)" },
145     { rc(fpr) | 4,                      "st(4)" },
146     { rc(fpr) | 5,                      "st(5)" },
147     { rc(fpr) | 6,                      "st(6)" },
148     { rc(fpr) | 7,                      "st(7)" },
149 #else
150 #  if __CYGWIN__ || _WIN32
151     { rc(gpr) | rc(rg8) | 0,            "%rax" },
152     { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" },
153     { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" },
154     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%rbx" },
155     { rc(sav) | rc(gpr) | 7,            "%rdi" },
156     { rc(sav) | rc(gpr) | 6,            "%rsi" },
157     { rc(sav) | rc(gpr) | 12,           "%r12" },
158     { rc(sav) | rc(gpr) | 13,           "%r13" },
159     { rc(sav) | rc(gpr) | 14,           "%r14" },
160     { rc(sav) | rc(gpr) | 15,           "%r15" },
161     { rc(arg) | rc(rg8) | rc(gpr) | 9,  "%r9" },
162     { rc(arg) | rc(rg8) | rc(gpr) | 8,  "%r8" },
163     { rc(arg) | rc(rg8) | rc(gpr) | 2,  "%rdx" },
164     { rc(arg) | rc(rg8) | rc(gpr) | 1,  "%rcx" },
165     { rc(sav) | 4,                      "%rsp" },
166     { rc(sav) | 5,                      "%rbp" },
167     { rc(xpr) | rc(fpr) | 4,            "%xmm4" },
168     { rc(xpr) | rc(fpr) | 5,            "%xmm5" },
169     { rc(sav) | rc(xpr) | rc(fpr) | 6,  "%xmm6" },
170     { rc(sav) | rc(xpr) | rc(fpr) | 7,  "%xmm7" },
171     { rc(sav) | rc(xpr) | rc(fpr) | 8,  "%xmm8" },
172     { rc(sav) | rc(xpr) | rc(fpr) | 9,  "%xmm9" },
173     { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" },
174     { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" },
175     { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" },
176     { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" },
177     { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" },
178     { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" },
179     { rc(xpr) | rc(arg) | rc(fpr) | 3,  "%xmm3" },
180     { rc(xpr) | rc(arg) | rc(fpr) | 2,  "%xmm2" },
181     { rc(xpr) | rc(arg) | rc(fpr) | 1,  "%xmm1" },
182     { rc(xpr) | rc(arg) | rc(fpr) | 0,  "%xmm0" },
183 #else
184     /* %rax is a pseudo flag argument for varargs functions */
185     { rc(arg) | rc(gpr) | rc(rg8) | 0,  "%rax" },
186     { rc(gpr) | rc(rg8) | 10,           "%r10" },
187     { rc(gpr) | rc(rg8) | 11,           "%r11" },
188     { rc(sav) | rc(rg8) | rc(gpr) | 3,  "%rbx" },
189     { rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" },
190     { rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" },
191     { rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" },
192     { rc(sav) | rc(gpr) | rc(rg8) | 12, "%r12" },
193     { rc(arg) | rc(rg8) | rc(gpr) | 9,  "%r9" },
194     { rc(arg) | rc(rg8) | rc(gpr) | 8,  "%r8" },
195     { rc(arg) | rc(rg8) | rc(gpr) | 1,  "%rcx" },
196     { rc(arg) | rc(rg8) | rc(gpr) | 2,  "%rdx" },
197     { rc(arg) | rc(rg8) | rc(gpr) | 6,  "%rsi" },
198     { rc(arg) | rc(rg8) | rc(gpr) | 7,  "%rdi" },
199     { rc(sav) | 4,                      "%rsp" },
200     { rc(sav) | 5,                      "%rbp" },
201     { rc(xpr) | rc(fpr) | 8,            "%xmm8" },
202     { rc(xpr) | rc(fpr) | 9,            "%xmm9" },
203     { rc(xpr) | rc(fpr) | 10,           "%xmm10" },
204     { rc(xpr) | rc(fpr) | 11,           "%xmm11" },
205     { rc(xpr) | rc(fpr) | 12,           "%xmm12" },
206     { rc(xpr) | rc(fpr) | 13,           "%xmm13" },
207     { rc(xpr) | rc(fpr) | 14,           "%xmm14" },
208     { rc(xpr) | rc(fpr) | 15,           "%xmm15" },
209     { rc(xpr) | rc(arg) | rc(fpr) | 7,  "%xmm7" },
210     { rc(xpr) | rc(arg) | rc(fpr) | 6,  "%xmm6" },
211     { rc(xpr) | rc(arg) | rc(fpr) | 5,  "%xmm5" },
212     { rc(xpr) | rc(arg) | rc(fpr) | 4,  "%xmm4" },
213     { rc(xpr) | rc(arg) | rc(fpr) | 3,  "%xmm3" },
214     { rc(xpr) | rc(arg) | rc(fpr) | 2,  "%xmm2" },
215     { rc(xpr) | rc(arg) | rc(fpr) | 1,  "%xmm1" },
216     { rc(xpr) | rc(arg) | rc(fpr) | 0,  "%xmm0" },
217 #  endif
218     { rc(fpr) | 0,                      "st(0)" },
219     { rc(fpr) | 1,                      "st(1)" },
220     { rc(fpr) | 2,                      "st(2)" },
221     { rc(fpr) | 3,                      "st(3)" },
222     { rc(fpr) | 4,                      "st(4)" },
223     { rc(fpr) | 5,                      "st(5)" },
224     { rc(fpr) | 6,                      "st(6)" },
225     { rc(fpr) | 7,                      "st(7)" },
226 #endif
227     { _NOREG,                           "<none>" },
228 };
229
230 /*
231  * Implementation
232  */
233 void
234 jit_get_cpu(void)
235 {
236     union {
237         struct {
238             jit_uint32_t sse3           : 1;
239             jit_uint32_t pclmulqdq      : 1;
240             jit_uint32_t dtes64         : 1;    /* amd reserved */
241             jit_uint32_t monitor        : 1;
242             jit_uint32_t ds_cpl         : 1;    /* amd reserved */
243             jit_uint32_t vmx            : 1;    /* amd reserved */
244             jit_uint32_t smx            : 1;    /* amd reserved */
245             jit_uint32_t est            : 1;    /* amd reserved */
246             jit_uint32_t tm2            : 1;    /* amd reserved */
247             jit_uint32_t ssse3          : 1;
248             jit_uint32_t cntx_id        : 1;    /* amd reserved */
249             jit_uint32_t __reserved0    : 1;
250             jit_uint32_t fma            : 1;
251             jit_uint32_t cmpxchg16b     : 1;
252             jit_uint32_t xtpr           : 1;    /* amd reserved */
253             jit_uint32_t pdcm           : 1;    /* amd reserved */
254             jit_uint32_t __reserved1    : 1;
255             jit_uint32_t pcid           : 1;    /* amd reserved */
256             jit_uint32_t dca            : 1;    /* amd reserved */
257             jit_uint32_t sse4_1         : 1;
258             jit_uint32_t sse4_2         : 1;
259             jit_uint32_t x2apic         : 1;    /* amd reserved */
260             jit_uint32_t movbe          : 1;    /* amd reserved */
261             jit_uint32_t popcnt         : 1;
262             jit_uint32_t tsc            : 1;    /* amd reserved */
263             jit_uint32_t aes            : 1;
264             jit_uint32_t xsave          : 1;
265             jit_uint32_t osxsave        : 1;
266             jit_uint32_t avx            : 1;
267             jit_uint32_t __reserved2    : 1;    /* amd F16C */
268             jit_uint32_t __reserved3    : 1;
269             jit_uint32_t __alwayszero   : 1;    /* amd RAZ */
270         } bits;
271         jit_uword_t     cpuid;
272     } ecx;
273     union {
274         struct {
275             jit_uint32_t fpu            : 1;
276             jit_uint32_t vme            : 1;
277             jit_uint32_t de             : 1;
278             jit_uint32_t pse            : 1;
279             jit_uint32_t tsc            : 1;
280             jit_uint32_t msr            : 1;
281             jit_uint32_t pae            : 1;
282             jit_uint32_t mce            : 1;
283             jit_uint32_t cmpxchg8b      : 1;
284             jit_uint32_t apic           : 1;
285             jit_uint32_t __reserved0    : 1;
286             jit_uint32_t sep            : 1;
287             jit_uint32_t mtrr           : 1;
288             jit_uint32_t pge            : 1;
289             jit_uint32_t mca            : 1;
290             jit_uint32_t cmov           : 1;
291             jit_uint32_t pat            : 1;
292             jit_uint32_t pse36          : 1;
293             jit_uint32_t psn            : 1;    /* amd reserved */
294             jit_uint32_t clfsh          : 1;
295             jit_uint32_t __reserved1    : 1;
296             jit_uint32_t ds             : 1;    /* amd reserved */
297             jit_uint32_t acpi           : 1;    /* amd reserved */
298             jit_uint32_t mmx            : 1;
299             jit_uint32_t fxsr           : 1;
300             jit_uint32_t sse            : 1;
301             jit_uint32_t sse2           : 1;
302             jit_uint32_t ss             : 1;    /* amd reserved */
303             jit_uint32_t htt            : 1;
304             jit_uint32_t tm             : 1;    /* amd reserved */
305             jit_uint32_t __reserved2    : 1;
306             jit_uint32_t pbe            : 1;    /* amd reserved */
307         } bits;
308         jit_uword_t     cpuid;
309     } edx;
310 #if __X32
311     int                 ac, flags;
312 #endif
313     jit_uword_t         eax, ebx;
314
315 #if __X32
316     /* adapted from glibc __sysconf */
317     __asm__ volatile ("pushfl;\n\t"
318                       "popl %0;\n\t"
319                       "movl $0x240000, %1;\n\t"
320                       "xorl %0, %1;\n\t"
321                       "pushl %1;\n\t"
322                       "popfl;\n\t"
323                       "pushfl;\n\t"
324                       "popl %1;\n\t"
325                       "xorl %0, %1;\n\t"
326                       "pushl %0;\n\t"
327                       "popfl"
328                       : "=r" (flags), "=r" (ac));
329
330     /* i386 or i486 without cpuid */
331     if ((ac & (1 << 21)) == 0)
332         /* probably without x87 as well */
333         return;
334 #endif
335
336     /* query %eax = 1 function */
337 #if __X32 || __X64_32
338     __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
339 #else
340     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
341 #endif
342                       : "=a" (eax), "=r" (ebx),
343                       "=c" (ecx.cpuid), "=d" (edx.cpuid)
344                       : "0" (1));
345
346     jit_cpu.fpu         = edx.bits.fpu;
347     jit_cpu.cmpxchg8b   = edx.bits.cmpxchg8b;
348     jit_cpu.cmov        = edx.bits.cmov;
349     jit_cpu.mmx         = edx.bits.mmx;
350     jit_cpu.sse         = edx.bits.sse;
351     jit_cpu.sse2        = edx.bits.sse2;
352     jit_cpu.sse3        = ecx.bits.sse3;
353     jit_cpu.pclmulqdq   = ecx.bits.pclmulqdq;
354     jit_cpu.ssse3       = ecx.bits.ssse3;
355     jit_cpu.fma         = ecx.bits.fma;
356     jit_cpu.cmpxchg16b  = ecx.bits.cmpxchg16b;
357     jit_cpu.sse4_1      = ecx.bits.sse4_1;
358     jit_cpu.sse4_2      = ecx.bits.sse4_2;
359     jit_cpu.movbe       = ecx.bits.movbe;
360     jit_cpu.popcnt      = ecx.bits.popcnt;
361     jit_cpu.aes         = ecx.bits.aes;
362     jit_cpu.avx         = ecx.bits.avx;
363
364     /* query %eax = 0x80000001 function */
365 #if __X64
366 #  if __X64_32
367     __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
368 #  else
369     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
370 #  endif
371                       : "=a" (eax), "=r" (ebx),
372                       "=c" (ecx.cpuid), "=d" (edx.cpuid)
373                       : "0" (0x80000001));
374     jit_cpu.lahf        = ecx.cpuid & 1;
375 #endif
376 }
377
378 void
379 _jit_init(jit_state_t *_jit)
380 {
381 #if __X32
382     jit_int32_t         regno;
383     static jit_bool_t   first = 1;
384 #endif
385
386     _jitc->reglen = jit_size(_rvs) - 1;
387 #if __X32
388     if (first) {
389         if (!jit_cpu.sse2) {
390             for (regno = _jitc->reglen; regno >= 0; regno--) {
391                 if (_rvs[regno].spec & jit_class_xpr)
392                     _rvs[regno].spec = 0;
393             }
394         }
395         first = 0;
396     }
397 #endif
398 }
399
400 void
401 _jit_prolog(jit_state_t *_jit)
402 {
403     jit_int32_t         offset;
404
405     if (_jitc->function)
406         jit_epilog();
407     assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
408     jit_regset_set_ui(&_jitc->regsav, 0);
409     offset = _jitc->functions.offset;
410     if (offset >= _jitc->functions.length) {
411         jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
412                     _jitc->functions.length * sizeof(jit_function_t),
413                     (_jitc->functions.length + 16) * sizeof(jit_function_t));
414         _jitc->functions.length += 16;
415     }
416     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
417     _jitc->function->self.size = stack_framesize;
418     _jitc->function->self.argi = _jitc->function->self.argf =
419         _jitc->function->self.aoff = _jitc->function->self.alen = 0;
420     /* sse/x87 conversion */
421     _jitc->function->self.aoff = CVT_OFFSET;
422     _jitc->function->self.call = jit_call_default;
423     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
424               _jitc->reglen * sizeof(jit_int32_t));
425
426     /* _no_link here does not mean the jit_link() call can be removed
427      * by rewriting as:
428      * _jitc->function->prolog = jit_new_node(jit_code_prolog);
429      */
430     _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
431     jit_link(_jitc->function->prolog);
432     _jitc->function->prolog->w.w = offset;
433     _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
434     /*  u:      label value
435      *  v:      offset in blocks vector
436      *  w:      offset in functions vector
437      */
438     _jitc->function->epilog->w.w = offset;
439
440     jit_regset_new(&_jitc->function->regset);
441 }
442
443 jit_int32_t
444 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
445 {
446     assert(_jitc->function);
447     switch (length) {
448         case 0: case 1:                                         break;
449         case 2:         _jitc->function->self.aoff &= -2;       break;
450         case 3: case 4: _jitc->function->self.aoff &= -4;       break;
451         default:        _jitc->function->self.aoff &= -8;       break;
452     }
453     _jitc->function->self.aoff -= length;
454
455     /* jit_allocai() may be called from jit_x86-cpu.c, and force a function
456      * generation restart on some conditions: div/rem and qmul/qdiv, due
457      * to registers constraints.
458      * The check is to prevent an assertion of a jit_xyz() being called
459      * during code generation, and attempting to add a node to the tail
460      * of the current IR generation. */
461     if (!_jitc->realize) {
462         jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
463         jit_dec_synth();
464     }
465
466     return (_jitc->function->self.aoff);
467 }
468
469 void
470 _jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
471 {
472     jit_int32_t          reg;
473     assert(_jitc->function);
474     jit_inc_synth_ww(allocar, u, v);
475     if (!_jitc->function->allocar) {
476         _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
477         _jitc->function->allocar = 1;
478     }
479     reg = jit_get_reg(jit_class_gpr);
480     jit_negr(reg, v);
481     jit_andi(reg, reg, -16);
482     jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
483     jit_addr(u, u, reg);
484     jit_addr(JIT_SP, JIT_SP, reg);
485     jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
486     jit_unget_reg(reg);
487     jit_dec_synth();
488 }
489
490 void
491 _jit_ret(jit_state_t *_jit)
492 {
493     jit_node_t          *instr;
494     assert(_jitc->function);
495     jit_inc_synth(ret);
496     /* jump to epilog */
497     instr = jit_jmpi();
498     jit_patch_at(instr, _jitc->function->epilog);
499     jit_dec_synth();
500 }
501
502 void
503 _jit_retr(jit_state_t *_jit, jit_int32_t u)
504 {
505     jit_inc_synth_w(retr, u);
506     /* movr(%ret, %ret) would be optimized out */
507     if (JIT_RET != u)
508         jit_movr(JIT_RET, u);
509     /* explicitly tell it is live */
510     jit_live(JIT_RET);
511     jit_ret();
512     jit_dec_synth();
513 }
514
515 void
516 _jit_reti(jit_state_t *_jit, jit_word_t u)
517 {
518     jit_inc_synth_w(reti, u);
519     jit_movi(JIT_RET, u);
520     jit_ret();
521     jit_dec_synth();
522 }
523
524 void
525 _jit_retr_f(jit_state_t *_jit, jit_int32_t u)
526 {
527     jit_inc_synth_w(retr_f, u);
528     if (JIT_FRET != u)
529         jit_movr_f(JIT_FRET, u);
530     else
531         jit_live(JIT_FRET);
532     jit_ret();
533     jit_dec_synth();
534 }
535
536 void
537 _jit_reti_f(jit_state_t *_jit, jit_float32_t u)
538 {
539     jit_inc_synth_f(reti_f, u);
540     jit_movi_f(JIT_FRET, u);
541     jit_ret();
542     jit_dec_synth();
543 }
544
545 void
546 _jit_retr_d(jit_state_t *_jit, jit_int32_t u)
547 {
548     jit_inc_synth_w(retr_d, u);
549     if (JIT_FRET != u)
550         jit_movr_d(JIT_FRET, u);
551     else
552         jit_live(JIT_FRET);
553     jit_ret();
554     jit_dec_synth();
555 }
556
557 void
558 _jit_reti_d(jit_state_t *_jit, jit_float64_t u)
559 {
560     jit_inc_synth_d(reti_d, u);
561     jit_movi_d(JIT_FRET, u);
562     jit_ret();
563     jit_dec_synth();
564 }
565
566 void
567 _jit_epilog(jit_state_t *_jit)
568 {
569     assert(_jitc->function);
570     assert(_jitc->function->epilog->next == NULL);
571     jit_link(_jitc->function->epilog);
572     _jitc->function = NULL;
573 }
574
575 jit_bool_t
576 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
577 {
578     if (u->code == jit_code_arg)
579         return (jit_arg_reg_p(u->u.w));
580     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
581     return (jit_arg_f_reg_p(u->u.w));
582 }
583
584 void
585 _jit_ellipsis(jit_state_t *_jit)
586 {
587     jit_inc_synth(ellipsis);
588     if (_jitc->prepare) {
589         jit_link_prepare();
590         /* Remember that a varargs function call is being constructed. */
591         assert(!(_jitc->function->call.call & jit_call_varargs));
592         _jitc->function->call.call |= jit_call_varargs;
593     }
594     else {
595         jit_link_prolog();
596         /* Remember the current function is varargs. */
597         assert(!(_jitc->function->self.call & jit_call_varargs));
598         _jitc->function->self.call |= jit_call_varargs;
599
600 #if __X64 && !(__CYGWIN__ || _WIN32)
601         /* Allocate va_list like object in the stack.
602          * If applicable, with enough space to save all argument
603          * registers, and use fixed offsets for them. */
604         _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
605
606         /* Initialize gp offset in save area. */
607         if (jit_arg_reg_p(_jitc->function->self.argi))
608             _jitc->function->vagp = _jitc->function->self.argi * 8;
609         else
610             _jitc->function->vagp = va_gp_max_offset;
611
612         /* Initialize fp offset in save area. */
613         if (jit_arg_f_reg_p(_jitc->function->self.argf))
614             _jitc->function->vafp = _jitc->function->self.argf * 16 +
615                                     va_gp_max_offset;
616         else
617             _jitc->function->vafp = va_fp_max_offset;
618 #endif
619     }
620     jit_dec_synth();
621 }
622
623 void
624 _jit_va_push(jit_state_t *_jit, jit_int32_t u)
625 {
626     jit_inc_synth_w(va_push, u);
627     jit_pushargr(u);
628     jit_dec_synth();
629 }
630
631 jit_node_t *
632 _jit_arg(jit_state_t *_jit)
633 {
634     jit_node_t          *node;
635     jit_int32_t          offset;
636     assert(_jitc->function);
637     assert(!(_jitc->function->self.call & jit_call_varargs));
638 #if __X64
639     if (jit_arg_reg_p(_jitc->function->self.argi)) {
640         offset = _jitc->function->self.argi++;
641 #  if __CYGWIN__ || _WIN32
642         _jitc->function->self.size += sizeof(jit_word_t);
643 #  endif
644     }
645     else
646 #endif
647     {
648         offset = _jitc->function->self.size;
649         _jitc->function->self.size += REAL_WORDSIZE;
650     }
651     node = jit_new_node_ww(jit_code_arg, offset,
652                            ++_jitc->function->self.argn);
653     jit_link_prolog();
654     return (node);
655 }
656
657 jit_node_t *
658 _jit_arg_f(jit_state_t *_jit)
659 {
660     jit_node_t          *node;
661     jit_int32_t          offset;
662     assert(_jitc->function);
663     assert(!(_jitc->function->self.call & jit_call_varargs));
664 #if __X64
665 #  if __CYGWIN__ || _WIN32
666     if (jit_arg_reg_p(_jitc->function->self.argi)) {
667         offset = _jitc->function->self.argi++;
668         _jitc->function->self.size += sizeof(jit_word_t);
669     }
670 #  else
671     if (jit_arg_f_reg_p(_jitc->function->self.argf))
672         offset = _jitc->function->self.argf++;
673 #  endif
674     else
675 #endif
676     {
677         offset = _jitc->function->self.size;
678         _jitc->function->self.size += REAL_WORDSIZE;
679     }
680     node = jit_new_node_ww(jit_code_arg_f, offset,
681                            ++_jitc->function->self.argn);
682     jit_link_prolog();
683     return (node);
684 }
685
686 jit_node_t *
687 _jit_arg_d(jit_state_t *_jit)
688 {
689     jit_node_t          *node;
690     jit_int32_t          offset;
691     assert(_jitc->function);
692     assert(!(_jitc->function->self.call & jit_call_varargs));
693 #if __X64
694 #  if __CYGWIN__ || _WIN32
695     if (jit_arg_reg_p(_jitc->function->self.argi)) {
696         offset = _jitc->function->self.argi++;
697         _jitc->function->self.size += sizeof(jit_word_t);
698     }
699 #  else
700     if (jit_arg_f_reg_p(_jitc->function->self.argf))
701         offset = _jitc->function->self.argf++;
702 #  endif
703     else
704 #endif
705     {
706         offset = _jitc->function->self.size;
707         _jitc->function->self.size += sizeof(jit_float64_t);
708     }
709     node = jit_new_node_ww(jit_code_arg_d, offset,
710                            ++_jitc->function->self.argn);
711     jit_link_prolog();
712     return (node);
713 }
714
715 void
716 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
717 {
718     assert(v->code == jit_code_arg);
719     jit_inc_synth_wp(getarg_c, u, v);
720 #if __X64
721     if (jit_arg_reg_p(v->u.w))
722         jit_extr_c(u, JIT_RA0 - v->u.w);
723     else
724 #endif
725         jit_ldxi_c(u, _RBP, v->u.w);
726     jit_dec_synth();
727 }
728
729 void
730 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
731 {
732     assert(v->code == jit_code_arg);
733     jit_inc_synth_wp(getarg_uc, u, v);
734 #if __X64
735     if (jit_arg_reg_p(v->u.w))
736         jit_extr_uc(u, JIT_RA0 - v->u.w);
737     else
738 #endif
739         jit_ldxi_uc(u, _RBP, v->u.w);
740     jit_dec_synth();
741 }
742
743 void
744 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
745 {
746     assert(v->code == jit_code_arg);
747     jit_inc_synth_wp(getarg_s, u, v);
748 #if __X64
749     if (jit_arg_reg_p(v->u.w))
750         jit_extr_s(u, JIT_RA0 - v->u.w);
751     else
752 #endif
753         jit_ldxi_s(u, _RBP, v->u.w);
754     jit_dec_synth();
755 }
756
757 void
758 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
759 {
760     assert(v->code == jit_code_arg);
761     jit_inc_synth_wp(getarg_us, u, v);
762 #if __X64
763     if (jit_arg_reg_p(v->u.w))
764         jit_extr_us(u, JIT_RA0 - v->u.w);
765     else
766 #endif
767         jit_ldxi_us(u, _RBP, v->u.w);
768     jit_dec_synth();
769 }
770
771 void
772 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
773 {
774     assert(v->code == jit_code_arg);
775     jit_inc_synth_wp(getarg_i, u, v);
776 #if __X64
777     if (jit_arg_reg_p(v->u.w)) {
778 #  if __X64_32
779         jit_movr(u, JIT_RA0 - v->u.w);
780 #  else
781         jit_extr_i(u, JIT_RA0 - v->u.w);
782 #  endif
783      }
784     else
785 #endif
786         jit_ldxi_i(u, _RBP, v->u.w);
787     jit_dec_synth();
788 }
789
790 #if __X64 && !__X64_32
791 void
792 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
793 {
794     assert(v->code == jit_code_arg);
795     jit_inc_synth_wp(getarg_ui, u, v);
796     if (jit_arg_reg_p(v->u.w))
797         jit_extr_ui(u, JIT_RA0 - v->u.w);
798     else
799         jit_ldxi_ui(u, _RBP, v->u.w);
800     jit_dec_synth();
801 }
802
803 void
804 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
805 {
806     assert(v->code == jit_code_arg);
807     jit_inc_synth_wp(getarg_l, u, v);
808     if (jit_arg_reg_p(v->u.w))
809         jit_movr(u, JIT_RA0 - v->u.w);
810     else
811         jit_ldxi_l(u, _RBP, v->u.w);
812     jit_dec_synth();
813 }
814 #endif
815
816 void
817 _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
818 {
819     assert(v->code == jit_code_arg);
820     jit_inc_synth_wp(putargr, u, v);
821 #if __X64
822     if (jit_arg_reg_p(v->u.w))
823         jit_movr(JIT_RA0 - v->u.w, u);
824     else
825 #endif
826         jit_stxi(v->u.w, _RBP, u);
827     jit_dec_synth();
828 }
829
830 void
831 _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
832 {
833     jit_int32_t         regno;
834     assert(v->code == jit_code_arg);
835     jit_inc_synth_wp(putargi, u, v);
836 #if __X64
837     if (jit_arg_reg_p(v->u.w))
838         jit_movi(JIT_RA0 - v->u.w, u);
839     else
840 #endif
841     {
842         regno = jit_get_reg(jit_class_gpr);
843         jit_movi(regno, u);
844         jit_stxi(v->u.w, _RBP, regno);
845         jit_unget_reg(regno);
846     }
847     jit_dec_synth();
848 }
849
850 void
851 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
852 {
853     assert(v->code == jit_code_arg_f);
854     jit_inc_synth_wp(getarg_f, u, v);
855 #if __X64
856     if (jit_arg_f_reg_p(v->u.w))
857         jit_movr_f(u, _XMM0 - v->u.w);
858     else
859 #endif
860         jit_ldxi_f(u, _RBP, v->u.w);
861     jit_dec_synth();
862 }
863
864 void
865 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
866 {
867     assert(v->code == jit_code_arg_f);
868     jit_inc_synth_wp(putargr_f, u, v);
869 #if __X64
870     if (jit_arg_reg_p(v->u.w))
871         jit_movr_f(_XMM0 - v->u.w, u);
872     else
873 #endif
874         jit_stxi_f(v->u.w, _RBP, u);
875     jit_dec_synth();
876 }
877
878 void
879 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
880 {
881     jit_int32_t         regno;
882     assert(v->code == jit_code_arg_f);
883     jit_inc_synth_fp(putargi_f, u, v);
884 #if __X64
885     if (jit_arg_reg_p(v->u.w))
886         jit_movi_f(_XMM0 - v->u.w, u);
887     else
888 #endif
889     {
890         regno = jit_get_reg(jit_class_gpr);
891         jit_movi_f(regno, u);
892         jit_stxi_f(v->u.w, _RBP, regno);
893         jit_unget_reg(regno);
894     }
895     jit_dec_synth();
896 }
897
898 void
899 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
900 {
901     assert(v->code == jit_code_arg_d);
902     jit_inc_synth_wp(getarg_d, u, v);
903 #if __X64
904     if (jit_arg_f_reg_p(v->u.w))
905         jit_movr_d(u, _XMM0 - v->u.w);
906     else
907 #endif
908         jit_ldxi_d(u, _RBP, v->u.w);
909     jit_dec_synth();
910 }
911
912 void
913 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
914 {
915     assert(v->code == jit_code_arg_d);
916     jit_inc_synth_wp(putargr_d, u, v);
917 #if __X64
918     if (jit_arg_reg_p(v->u.w))
919         jit_movr_d(_XMM0 - v->u.w, u);
920     else
921 #endif
922         jit_stxi_d(v->u.w, _RBP, u);
923     jit_dec_synth();
924 }
925
926 void
927 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
928 {
929     jit_int32_t         regno;
930     assert(v->code == jit_code_arg_d);
931     jit_inc_synth_dp(putargi_d, u, v);
932 #if __X64
933     if (jit_arg_reg_p(v->u.w))
934         jit_movi_d(_XMM0 - v->u.w, u);
935     else
936 #endif
937     {
938         regno = jit_get_reg(jit_class_gpr);
939         jit_movi_d(regno, u);
940         jit_stxi_d(v->u.w, _RBP, regno);
941         jit_unget_reg(regno);
942     }
943     jit_dec_synth();
944 }
945
946 void
947 _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
948 {
949     assert(_jitc->function);
950     jit_inc_synth_w(pushargr, u);
951     jit_link_prepare();
952 #if __X64
953     if (jit_arg_reg_p(_jitc->function->call.argi)) {
954         jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
955         ++_jitc->function->call.argi;
956 #  if __CYGWIN__ || _WIN32
957         if (_jitc->function->call.call & jit_call_varargs)
958             jit_stxi(_jitc->function->call.size, _RSP, u);
959         _jitc->function->call.size += sizeof(jit_word_t);
960 #  endif
961     }
962     else
963 #endif
964     {
965         jit_stxi(_jitc->function->call.size, _RSP, u);
966         _jitc->function->call.size += REAL_WORDSIZE;
967     }
968     jit_dec_synth();
969 }
970
971 void
972 _jit_pushargi(jit_state_t *_jit, jit_word_t u)
973 {
974     jit_int32_t          regno;
975     assert(_jitc->function);
976     jit_inc_synth_w(pushargi, u);
977     jit_link_prepare();
978 #if __X64
979     if (jit_arg_reg_p(_jitc->function->call.argi)) {
980         jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
981 #  if __CYGWIN__ || _WIN32
982         if (_jitc->function->call.call & jit_call_varargs)
983             jit_stxi(_jitc->function->call.size, _RSP,
984                      JIT_RA0 - _jitc->function->call.argi);
985         _jitc->function->call.size += sizeof(jit_word_t);
986 #  endif
987         ++_jitc->function->call.argi;
988     }
989     else
990 #endif
991     {
992         regno = jit_get_reg(jit_class_gpr);
993         jit_movi(regno, u);
994         jit_stxi(_jitc->function->call.size, _RSP, regno);
995         _jitc->function->call.size += REAL_WORDSIZE;
996         jit_unget_reg(regno);
997     }
998     jit_dec_synth();
999 }
1000
1001 void
1002 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
1003 {
1004     assert(_jitc->function);
1005     jit_inc_synth_w(pushargr_f, u);
1006     jit_link_prepare();
1007 #if __X64
1008 #  if __CYGWIN__ || _WIN32
1009     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1010         jit_movr_f(_XMM0 - _jitc->function->call.argi, u);
1011         if (_jitc->function->call.call & jit_call_varargs) {
1012             jit_stxi_f(_jitc->function->call.size, _RSP,
1013                        _XMM0 - _jitc->function->call.argi);
1014             jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1015                        _jitc->function->call.size);
1016         }
1017         ++_jitc->function->call.argi;
1018         _jitc->function->call.size += sizeof(jit_word_t);
1019     }
1020 #  else
1021     if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
1022         jit_movr_f(_XMM0 - _jitc->function->call.argf, u);
1023         ++_jitc->function->call.argf;
1024     }
1025 #  endif
1026     else
1027 #endif
1028     {
1029         jit_stxi_f(_jitc->function->call.size, _RSP, u);
1030         _jitc->function->call.size += REAL_WORDSIZE;
1031     }
1032     jit_dec_synth();
1033 }
1034
1035 void
1036 _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
1037 {
1038     jit_int32_t         regno;
1039     assert(_jitc->function);
1040     jit_inc_synth_f(pushargi_f, u);
1041     jit_link_prepare();
1042 #if __X64
1043 #  if __CYGWIN__ || _WIN32
1044     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1045         jit_movi_f(_XMM0 - _jitc->function->call.argi, u);
1046         if (_jitc->function->call.call & jit_call_varargs) {
1047             jit_stxi_f(_jitc->function->call.size, _RSP,
1048                        _XMM0 - _jitc->function->call.argi);
1049             jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP,
1050                        _jitc->function->call.size);
1051         }
1052         ++_jitc->function->call.argi;
1053         _jitc->function->call.size += sizeof(jit_word_t);
1054     }
1055 #  else
1056     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1057         jit_movi_f(_XMM0 - _jitc->function->call.argf, u);
1058         ++_jitc->function->call.argf;
1059     }
1060 #  endif
1061     else
1062 #endif
1063     {
1064         regno = jit_get_reg(jit_class_fpr);
1065         jit_movi_f(regno, u);
1066         jit_stxi_f(_jitc->function->call.size, _RSP, regno);
1067         _jitc->function->call.size += REAL_WORDSIZE;
1068         jit_unget_reg(regno);
1069     }
1070     jit_dec_synth();
1071 }
1072
1073 void
1074 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
1075 {
1076     assert(_jitc->function);
1077     jit_inc_synth_w(pushargr_d, u);
1078     jit_link_prepare();
1079 #if __X64
1080 #  if __CYGWIN__ || _WIN32
1081     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1082         jit_movr_d(_XMM0 - _jitc->function->call.argi, u);
1083         if (_jitc->function->call.call & jit_call_varargs) {
1084             jit_stxi_d(_jitc->function->call.size, _RSP,
1085                        _XMM0 - _jitc->function->call.argi);
1086             jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1087                        _jitc->function->call.size);
1088         }
1089         ++_jitc->function->call.argi;
1090         _jitc->function->call.size += sizeof(jit_word_t);
1091     }
1092 #  else
1093     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1094         jit_movr_d(_XMM0 - _jitc->function->call.argf, u);
1095         ++_jitc->function->call.argf;
1096     }
1097 #  endif
1098     else
1099 #endif
1100     {
1101         jit_stxi_d(_jitc->function->call.size, _RSP, u);
1102         _jitc->function->call.size += sizeof(jit_float64_t);
1103     }
1104     jit_dec_synth();
1105 }
1106
1107 void
1108 _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
1109 {
1110     jit_int32_t          regno;
1111     assert(_jitc->function);
1112     jit_inc_synth_d(pushargi_d, u);
1113     jit_link_prepare();
1114 #if __X64
1115 #  if __CYGWIN__ || _WIN32
1116     if (jit_arg_reg_p(_jitc->function->call.argi)) {
1117         jit_movi_d(_XMM0 - _jitc->function->call.argi, u);
1118         if (_jitc->function->call.call & jit_call_varargs) {
1119             jit_stxi_d(_jitc->function->call.size, _RSP,
1120                        _XMM0 - _jitc->function->call.argi);
1121             jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP,
1122                        _jitc->function->call.size);
1123         }
1124         ++_jitc->function->call.argi;
1125         _jitc->function->call.size += sizeof(jit_word_t);
1126     }
1127 #  else
1128     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
1129         jit_movi_d(_XMM0 - _jitc->function->call.argf, u);
1130         ++_jitc->function->call.argf;
1131     }
1132 #  endif
1133     else
1134 #endif
1135     {
1136         regno = jit_get_reg(jit_class_fpr);
1137         jit_movi_d(regno, u);
1138         jit_stxi_d(_jitc->function->call.size, _RSP, regno);
1139         _jitc->function->call.size += sizeof(jit_float64_t);
1140         jit_unget_reg(regno);
1141     }
1142     jit_dec_synth();
1143 }
1144
1145 jit_bool_t
1146 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
1147 {
1148 #if __X64
1149     jit_int32_t         spec;
1150
1151     spec = jit_class(_rvs[regno].spec);
1152     if (spec & jit_class_arg) {
1153         if (spec & jit_class_gpr) {
1154             regno = JIT_RA0 - regno;
1155             if (regno >= 0 && regno < node->v.w)
1156                 return (1);
1157         }
1158         else if (spec & jit_class_fpr) {
1159             regno = _XMM0 - regno;
1160             if (regno >= 0 && regno < node->w.w)
1161                 return (1);
1162         }
1163     }
1164 #endif
1165     return (0);
1166 }
1167
1168 void
1169 _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
1170 {
1171     jit_int32_t          reg;
1172     jit_node_t          *call;
1173     assert(_jitc->function);
1174     reg = r0;
1175     jit_inc_synth_w(finishr, r0);
1176     if (_jitc->function->self.alen < _jitc->function->call.size)
1177         _jitc->function->self.alen = _jitc->function->call.size;
1178 #if __X64
1179 #  if !(__CYGWIN__ || _WIN32)
1180     if (_jitc->function->call.call & jit_call_varargs) {
1181         if (jit_regno(reg) == _RAX) {
1182             reg = jit_get_reg(jit_class_gpr);
1183             jit_movr(reg, _RAX);
1184         }
1185         if (_jitc->function->call.argf)
1186             jit_movi(_RAX, _jitc->function->call.argf);
1187         else
1188             jit_movi(_RAX, 0);
1189         if (reg != r0)
1190             jit_unget_reg(reg);
1191     }
1192 #  endif
1193 #endif
1194     call = jit_callr(reg);
1195     call->v.w = _jitc->function->call.argi;
1196     call->w.w = _jitc->function->call.argf;
1197     _jitc->function->call.argi = _jitc->function->call.argf =
1198         _jitc->function->call.size = 0;
1199     _jitc->prepare = 0;
1200     jit_dec_synth();
1201 }
1202
1203 jit_node_t *
1204 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
1205 {
1206 #if __X64
1207     jit_int32_t         reg;
1208 #endif
1209     jit_node_t          *node;
1210     assert(_jitc->function);
1211     jit_inc_synth_w(finishi, (jit_word_t)i0);
1212     if (_jitc->function->self.alen < _jitc->function->call.size)
1213         _jitc->function->self.alen = _jitc->function->call.size;
1214 #if __X64
1215     /* FIXME preventing %rax allocation is good enough, but for consistency
1216      * it should automatically detect %rax is dead, in case it has run out
1217      * registers, and not save/restore it, what would be wrong if using the
1218      * the return value, otherwise, just a needless noop */
1219     /* >> prevent %rax from being allocated as the function pointer */
1220     jit_regset_setbit(&_jitc->regarg, _RAX);
1221     reg = jit_get_reg(jit_class_gpr);
1222     node = jit_movi(reg, (jit_word_t)i0);
1223     jit_finishr(reg);
1224     jit_unget_reg(reg);
1225     /* << prevent %rax from being allocated as the function pointer */
1226     jit_regset_clrbit(&_jitc->regarg, _RAX);
1227 #else
1228     node = jit_calli(i0);
1229     node->v.w = _jitc->function->call.argi;
1230     node->w.w = _jitc->function->call.argf;
1231 #endif
1232     _jitc->function->call.argi = _jitc->function->call.argf =
1233         _jitc->function->call.size = 0;
1234     _jitc->prepare = 0;
1235     jit_dec_synth();
1236     return (node);
1237 }
1238
1239 void
1240 _jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
1241 {
1242     jit_inc_synth_w(retval_c, r0);
1243     jit_extr_c(r0, JIT_RET);
1244     jit_dec_synth();
1245 }
1246
1247 void
1248 _jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
1249 {
1250     jit_inc_synth_w(retval_uc, r0);
1251     jit_extr_uc(r0, JIT_RET);
1252     jit_dec_synth();
1253 }
1254
1255 void
1256 _jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
1257 {
1258     jit_inc_synth_w(retval_s, r0);
1259     jit_extr_s(r0, JIT_RET);
1260     jit_dec_synth();
1261 }
1262
1263 void
1264 _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
1265 {
1266     jit_inc_synth_w(retval_us, r0);
1267     jit_extr_us(r0, JIT_RET);
1268     jit_dec_synth();
1269 }
1270
1271 void
1272 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
1273 {
1274     jit_inc_synth_w(retval_i, r0);
1275 #if __X32 || __X64_32
1276     if (r0 != JIT_RET)
1277         jit_movr(r0, JIT_RET);
1278 #else
1279     jit_extr_i(r0, JIT_RET);
1280 #endif
1281     jit_dec_synth();
1282 }
1283
1284 #if __X64 && !__X64_32
1285 void
1286 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
1287 {
1288     jit_inc_synth_w(retval_ui, r0);
1289     jit_extr_ui(r0, JIT_RET);
1290     jit_dec_synth();
1291 }
1292
1293 void
1294 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
1295 {
1296     jit_inc_synth_w(retval_l, r0);
1297     if (r0 != JIT_RET)
1298         jit_movr(r0, JIT_RET);
1299     jit_dec_synth();
1300 }
1301 #endif
1302
1303 void
1304 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
1305 {
1306     jit_inc_synth_w(retval_f, r0);
1307 #if __X64
1308     if (r0 != JIT_FRET)
1309         jit_movr_f(r0, JIT_FRET);
1310 #endif
1311     jit_dec_synth();
1312 }
1313
1314 void
1315 _jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
1316 {
1317     jit_inc_synth_w(retval_d, r0);
1318 #if __X64
1319     if (r0 != JIT_FRET)
1320         jit_movr_d(r0, JIT_FRET);
1321 #endif
1322     jit_dec_synth();
1323 }
1324
1325 jit_pointer_t
1326 _emit_code(jit_state_t *_jit)
1327 {
1328     jit_node_t          *node;
1329     jit_node_t          *temp;
1330     jit_word_t           word;
1331     jit_int32_t          value;
1332     jit_int32_t          offset;
1333     struct {
1334         jit_node_t      *node;
1335         jit_word_t       word;
1336 #if DEVEL_DISASSEMBLER
1337         jit_word_t       prevw;
1338 #endif
1339         jit_int32_t      patch_offset;
1340     } undo;
1341 #if DEVEL_DISASSEMBLER
1342     jit_word_t           prevw;
1343 #endif
1344
1345     _jitc->function = NULL;
1346
1347     jit_reglive_setup();
1348
1349     undo.word = 0;
1350     undo.node = NULL;
1351     undo.patch_offset = 0;
1352 #define case_rr(name, type)                                             \
1353             case jit_code_##name##r##type:                              \
1354                 name##r##type(rn(node->u.w), rn(node->v.w));            \
1355                 break
1356 #define case_rw(name, type)                                             \
1357             case jit_code_##name##i##type:                              \
1358                 name##i##type(rn(node->u.w), node->v.w);                \
1359                 break
1360 #define case_rf(name, type)                                             \
1361             case jit_code_##name##r##type:                              \
1362                 if (jit_x87_reg_p(node->v.w))                           \
1363                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1364                 else                                                    \
1365                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1366                 break
1367 #define case_fr(name, type)                                             \
1368             case jit_code_##name##r##type:                              \
1369                 if (jit_x87_reg_p(node->u.w))                           \
1370                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1371                 else                                                    \
1372                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1373                 break
1374 #define case_fw(name, type)                                             \
1375             case jit_code_##name##i##type:                              \
1376                 if (jit_x87_reg_p(node->u.w))                           \
1377                     x87_##name##i##type(rn(node->u.w), node->v.w);      \
1378                 else                                                    \
1379                     sse_##name##i##type(rn(node->u.w), node->v.w);      \
1380                 break
1381 #define case_wr(name, type)                                             \
1382             case jit_code_##name##i##type:                              \
1383                 name##i##type(node->u.w, rn(node->v.w));                \
1384                 break
1385 #define case_wf(name, type)                                             \
1386             case jit_code_##name##i##type:                              \
1387                 if (jit_x87_reg_p(node->v.w))                           \
1388                     x87_##name##i##type(node->u.w, rn(node->v.w));      \
1389                 else                                                    \
1390                     sse_##name##i##type(node->u.w, rn(node->v.w));      \
1391                 break
1392 #define case_ff(name, type)                                             \
1393             case jit_code_##name##r##type:                              \
1394                 if (jit_x87_reg_p(node->u.w) &&                         \
1395                     jit_x87_reg_p(node->v.w))                           \
1396                     x87_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1397                 else                                                    \
1398                     sse_##name##r##type(rn(node->u.w), rn(node->v.w));  \
1399                 break;
1400 #define case_rrr(name, type)                                            \
1401             case jit_code_##name##r##type:                              \
1402                 name##r##type(rn(node->u.w),                            \
1403                               rn(node->v.w), rn(node->w.w));            \
1404                 break
1405 #define case_rrrr(name, type)                                           \
1406             case jit_code_##name##r##type:                              \
1407                 name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
1408                               rn(node->v.w), rn(node->w.w));            \
1409                 break
1410 #define case_frr(name, type)                                            \
1411             case jit_code_##name##r##type:                              \
1412                 if (jit_x87_reg_p(node->u.w))                           \
1413                     x87_##name##r##type(rn(node->u.w),                  \
1414                                         rn(node->v.w), rn(node->w.w));  \
1415                 else                                                    \
1416                     sse_##name##r##type(rn(node->u.w),                  \
1417                                         rn(node->v.w), rn(node->w.w));  \
1418                 break
1419 #define case_rrf(name, type)                                            \
1420             case jit_code_##name##r##type:                              \
1421                 if (jit_x87_reg_p(node->w.w))                           \
1422                     x87_##name##r##type(rn(node->u.w),                  \
1423                                         rn(node->v.w), rn(node->w.w));  \
1424                 else                                                    \
1425                     sse_##name##r##type(rn(node->u.w),                  \
1426                                         rn(node->v.w), rn(node->w.w));  \
1427                 break
1428 #define case_rrw(name, type)                                            \
1429             case jit_code_##name##i##type:                              \
1430                 name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
1431                 break
1432 #define case_rrrw(name, type)                                           \
1433             case jit_code_##name##i##type:                              \
1434                 name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
1435                               rn(node->v.w), node->w.w);                \
1436                 break
1437 #define case_frw(name, type)                                            \
1438             case jit_code_##name##i##type:                              \
1439                 if (jit_x87_reg_p(node->u.w))                           \
1440                     x87_##name##i##type(rn(node->u.w),                  \
1441                                         rn(node->v.w), node->w.w);      \
1442                 else                                                    \
1443                     sse_##name##i##type(rn(node->u.w),                  \
1444                                         rn(node->v.w), node->w.w);      \
1445                 break
1446 #define case_wrr(name, type)                                            \
1447             case jit_code_##name##i##type:                              \
1448                 name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
1449                 break
1450 #define case_wrf(name, type)                                            \
1451             case jit_code_##name##i##type:                              \
1452                 if (jit_x87_reg_p(node->w.w))                           \
1453                     x87_##name##i##type(node->u.w,                      \
1454                                         rn(node->v.w), rn(node->w.w));  \
1455                 else                                                    \
1456                     sse_##name##i##type(node->u.w,                      \
1457                                         rn(node->v.w), rn(node->w.w));  \
1458                 break
1459 #define case_brr(name, type)                                            \
1460             case jit_code_##name##r##type:                              \
1461                 temp = node->u.n;                                       \
1462                 assert(temp->code == jit_code_label ||                  \
1463                        temp->code == jit_code_epilog);                  \
1464                 if (temp->flag & jit_flag_patch)                        \
1465                     name##r##type(temp->u.w, rn(node->v.w),             \
1466                                   rn(node->w.w));                       \
1467                 else {                                                  \
1468                     word = name##r##type(_jit->pc.w,                    \
1469                                          rn(node->v.w), rn(node->w.w)); \
1470                     patch(word, node);                                  \
1471                 }                                                       \
1472                 break
1473 #define case_brw(name, type)                                            \
1474             case jit_code_##name##i##type:                              \
1475                 temp = node->u.n;                                       \
1476                 assert(temp->code == jit_code_label ||                  \
1477                        temp->code == jit_code_epilog);                  \
1478                 if (temp->flag & jit_flag_patch)                        \
1479                     name##i##type(temp->u.w,                            \
1480                                   rn(node->v.w), node->w.w);            \
1481                 else {                                                  \
1482                     word = name##i##type(_jit->pc.w,                    \
1483                                          rn(node->v.w), node->w.w);     \
1484                     patch(word, node);                                  \
1485                 }                                                       \
1486                 break
1487 #define case_rff(name, type)                                            \
1488             case jit_code_##name##r##type:                              \
1489                 if (jit_x87_reg_p(node->v.w) &&                         \
1490                     jit_x87_reg_p(node->w.w))                           \
1491                     x87_##name##r##type(rn(node->u.w), rn(node->v.w),   \
1492                                         rn(node->w.w));                 \
1493                 else                                                    \
1494                     sse_##name##r##type(rn(node->u.w), rn(node->v.w),   \
1495                                         rn(node->w.w));                 \
1496                 break;
1497 #define case_rfw(name, type, size)                                      \
1498             case jit_code_##name##i##type:                              \
1499                 assert(node->flag & jit_flag_data);                     \
1500                 if (jit_x87_reg_p(node->v.w))                           \
1501                     x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1502                                 (jit_float##size##_t *)node->w.n->u.w); \
1503                 else                                                    \
1504                     sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1505                                 (jit_float##size##_t *)node->w.n->u.w); \
1506                 break
1507 #define case_fff(name, type)                                            \
1508             case jit_code_##name##r##type:                              \
1509                 if (jit_x87_reg_p(node->u.w) &&                         \
1510                     jit_x87_reg_p(node->v.w) &&                         \
1511                     jit_x87_reg_p(node->w.w))                           \
1512                     x87_##name##r##type(rn(node->u.w),                  \
1513                                         rn(node->v.w), rn(node->w.w));  \
1514                 else                                                    \
1515                     sse_##name##r##type(rn(node->u.w),                  \
1516                                         rn(node->v.w), rn(node->w.w));  \
1517                 break
1518 #define case_ffw(name, type, size)                                      \
1519             case jit_code_##name##i##type:                              \
1520                 assert(node->flag & jit_flag_data);                     \
1521                 if (jit_x87_reg_p(node->u.w) &&                         \
1522                     jit_x87_reg_p(node->v.w))                           \
1523                     x87_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1524                                 (jit_float##size##_t *)node->w.n->u.w); \
1525                 else                                                    \
1526                     sse_##name##i##type(rn(node->u.w), rn(node->v.w),   \
1527                                 (jit_float##size##_t *)node->w.n->u.w); \
1528                 break
1529 #define case_bff(name, type)                                            \
1530             case jit_code_b##name##r##type:                             \
1531                 temp = node->u.n;                                       \
1532                 assert(temp->code == jit_code_label ||                  \
1533                        temp->code == jit_code_epilog);                  \
1534                 if (temp->flag & jit_flag_patch) {                      \
1535                     if (jit_x87_reg_p(node->v.w) &&                     \
1536                         jit_x87_reg_p(node->w.w))                       \
1537                         x87_b##name##r##type(temp->u.w,                 \
1538                                 rn(node->v.w), rn(node->w.w));          \
1539                     else                                                \
1540                         sse_b##name##r##type(temp->u.w,                 \
1541                                 rn(node->v.w), rn(node->w.w));          \
1542                 }                                                       \
1543                 else {                                                  \
1544                     if (jit_x87_reg_p(node->v.w) &&                     \
1545                         jit_x87_reg_p(node->w.w))                       \
1546                         word = x87_b##name##r##type(_jit->pc.w,         \
1547                                 rn(node->v.w), rn(node->w.w));          \
1548                     else                                                \
1549                         word = sse_b##name##r##type(_jit->pc.w,         \
1550                                 rn(node->v.w), rn(node->w.w));          \
1551                     patch(word, node);                                  \
1552                 }                                                       \
1553                 break
1554 #define case_bfw(name, type, size)                                      \
1555             case jit_code_b##name##i##type:                             \
1556                 temp = node->u.n;                                       \
1557                 assert(temp->code == jit_code_label ||                  \
1558                        temp->code == jit_code_epilog);                  \
1559                 if (temp->flag & jit_flag_patch) {                      \
1560                     if (jit_x87_reg_p(node->v.w))                       \
1561                         x87_b##name##i##type(temp->u.w,                 \
1562                                 rn(node->v.w),                          \
1563                                 (jit_float##size##_t *)node->w.n->u.w); \
1564                     else                                                \
1565                         sse_b##name##i##type(temp->u.w,                 \
1566                                 rn(node->v.w),                          \
1567                                 (jit_float##size##_t *)node->w.n->u.w); \
1568                 }                                                       \
1569                 else {                                                  \
1570                     if (jit_x87_reg_p(node->v.w))                       \
1571                         word = x87_b##name##i##type(_jit->pc.w,         \
1572                                 rn(node->v.w),                          \
1573                                 (jit_float##size##_t *)node->w.n->u.w); \
1574                     else                                                \
1575                         word = sse_b##name##i##type(_jit->pc.w,         \
1576                                 rn(node->v.w),                          \
1577                                 (jit_float##size##_t *)node->w.n->u.w); \
1578                     patch(word, node);                                  \
1579                 }                                                       \
1580                 break
1581 #if DEVEL_DISASSEMBLER
1582     prevw = _jit->pc.w;
1583 #endif
1584     for (node = _jitc->head; node; node = node->next) {
1585         if (_jit->pc.uc >= _jitc->code.end)
1586             return (NULL);
1587
1588 #if DEVEL_DISASSEMBLER
1589         node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
1590         prevw = _jit->pc.w;
1591 #endif
1592         value = jit_classify(node->code);
1593         jit_regarg_set(node, value);
1594         switch (node->code) {
1595             case jit_code_align:
1596                 assert(!(node->u.w & (node->u.w - 1)) &&
1597                        node->u.w <= sizeof(jit_word_t));
1598                 if ((word = _jit->pc.w & (node->u.w - 1)))
1599                     nop(node->u.w - word);
1600                 break;
1601             case jit_code_note:         case jit_code_name:
1602                 node->u.w = _jit->pc.w;
1603                 break;
1604             case jit_code_label:
1605                 if ((node->link || (node->flag & jit_flag_use)) &&
1606                     (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
1607                     nop(sizeof(jit_word_t) - word);
1608                 /* remember label is defined */
1609                 node->flag |= jit_flag_patch;
1610                 node->u.w = _jit->pc.w;
1611                 break;
1612                 case_rrr(add,);
1613                 case_rrw(add,);
1614                 case_rrr(addx,);
1615                 case_rrw(addx,);
1616                 case_rrr(addc,);
1617                 case_rrw(addc,);
1618                 case_rrr(sub,);
1619                 case_rrw(sub,);
1620                 case_rrr(subx,);
1621                 case_rrw(subx,);
1622                 case_rrr(subc,);
1623                 case_rrw(subc,);
1624                 case_rrw(rsb,);
1625                 case_rrr(mul,);
1626                 case_rrw(mul,);
1627                 case_rrrr(qmul,);
1628                 case_rrrw(qmul,);
1629                 case_rrrr(qmul, _u);
1630                 case_rrrw(qmul, _u);
1631                 case_rrr(div,);
1632                 case_rrw(div,);
1633                 case_rrr(div, _u);
1634                 case_rrw(div, _u);
1635                 case_rrrr(qdiv,);
1636                 case_rrrw(qdiv,);
1637                 case_rrrr(qdiv, _u);
1638                 case_rrrw(qdiv, _u);
1639                 case_rrr(rem,);
1640                 case_rrw(rem,);
1641                 case_rrr(rem, _u);
1642                 case_rrw(rem, _u);
1643                 case_rrr(and,);
1644                 case_rrw(and,);
1645                 case_rrr(or,);
1646                 case_rrw(or,);
1647                 case_rrr(xor,);
1648                 case_rrw(xor,);
1649                 case_rrr(lsh,);
1650                 case_rrw(lsh,);
1651                 case_rrr(rsh,);
1652                 case_rrw(rsh,);
1653                 case_rrr(rsh, _u);
1654                 case_rrw(rsh, _u);
1655                 case_rr(neg,);
1656                 case_rr(com,);
1657                 case_rrr(lt,);
1658                 case_rrw(lt,);
1659                 case_rrr(lt, _u);
1660                 case_rrw(lt, _u);
1661                 case_rrr(le,);
1662                 case_rrw(le,);
1663                 case_rrr(le, _u);
1664                 case_rrw(le, _u);
1665                 case_rrr(eq,);
1666                 case_rrw(eq,);
1667                 case_rrr(ge,);
1668                 case_rrw(ge,);
1669                 case_rrr(ge, _u);
1670                 case_rrw(ge, _u);
1671                 case_rrr(gt,);
1672                 case_rrw(gt,);
1673                 case_rrr(gt, _u);
1674                 case_rrw(gt, _u);
1675                 case_rrr(ne,);
1676                 case_rrw(ne,);
1677                 case_rrr(movn,);
1678                 case_rrr(movz,);
1679                 case_rr(mov,);
1680             case jit_code_movi:
1681                 if (node->flag & jit_flag_node) {
1682                     temp = node->v.n;
1683                     if (temp->code == jit_code_data ||
1684                         (temp->code == jit_code_label &&
1685                          (temp->flag & jit_flag_patch)))
1686                         movi(rn(node->u.w), temp->u.w);
1687                     else {
1688                         assert(temp->code == jit_code_label ||
1689                                temp->code == jit_code_epilog);
1690                         word = movi_p(rn(node->u.w), node->v.w);
1691                         patch(word, node);
1692                     }
1693                 }
1694                 else
1695                     movi(rn(node->u.w), node->v.w);
1696                 break;
1697                 case_rr(hton, _us);
1698                 case_rr(hton, _ui);
1699 #if __X64 && !__X64_32
1700                 case_rr(hton, _ul);
1701 #endif
1702                 case_rr(bswap, _us);
1703                 case_rr(bswap, _ui);
1704 #if __X64 && !__X64_32
1705                 case_rr(bswap, _ul);
1706 #endif
1707                 case_rr(ext, _c);
1708                 case_rr(ext, _uc);
1709                 case_rr(ext, _s);
1710                 case_rr(ext, _us);
1711 #if __X64 && !__X64_32
1712                 case_rr(ext, _i);
1713                 case_rr(ext, _ui);
1714 #endif
1715                 case_rf(trunc, _f_i);
1716                 case_rf(trunc, _d_i);
1717 #if __X64
1718                 case_rf(trunc, _f_l);
1719                 case_rf(trunc, _d_l);
1720 #endif
1721                 case_rr(ld, _c);
1722                 case_rw(ld, _c);
1723                 case_rr(ld, _uc);
1724                 case_rw(ld, _uc);
1725                 case_rr(ld, _s);
1726                 case_rw(ld, _s);
1727                 case_rr(ld, _us);
1728                 case_rw(ld, _us);
1729                 case_rr(ld, _i);
1730                 case_rw(ld, _i);
1731 #if __X64 && !__X64_32
1732                 case_rr(ld, _ui);
1733                 case_rw(ld, _ui);
1734                 case_rr(ld, _l);
1735                 case_rw(ld, _l);
1736 #endif
1737                 case_rrr(ldx, _c);
1738                 case_rrw(ldx, _c);
1739                 case_rrr(ldx, _uc);
1740                 case_rrw(ldx, _uc);
1741                 case_rrr(ldx, _s);
1742                 case_rrw(ldx, _s);
1743                 case_rrr(ldx, _us);
1744                 case_rrw(ldx, _us);
1745                 case_rrr(ldx, _i);
1746                 case_rrw(ldx, _i);
1747 #if __X64 && !__X64_32
1748                 case_rrr(ldx, _ui);
1749                 case_rrw(ldx, _ui);
1750                 case_rrr(ldx, _l);
1751                 case_rrw(ldx, _l);
1752 #endif
1753                 case_rr(st, _c);
1754                 case_wr(st, _c);
1755                 case_rr(st, _s);
1756                 case_wr(st, _s);
1757                 case_rr(st, _i);
1758                 case_wr(st, _i);
1759 #if __X64 && !__X64_32
1760                 case_rr(st, _l);
1761                 case_wr(st, _l);
1762 #endif
1763                 case_rrr(stx, _c);
1764                 case_wrr(stx, _c);
1765                 case_rrr(stx, _s);
1766                 case_wrr(stx, _s);
1767                 case_rrr(stx, _i);
1768                 case_wrr(stx, _i);
1769 #if __X64 && !__X64_32
1770                 case_rrr(stx, _l);
1771                 case_wrr(stx, _l);
1772 #endif
1773                 case_brr(blt,);
1774                 case_brw(blt,);
1775                 case_brr(blt, _u);
1776                 case_brw(blt, _u);
1777                 case_brr(ble,);
1778                 case_brw(ble,);
1779                 case_brr(ble, _u);
1780                 case_brw(ble, _u);
1781                 case_brr(beq,);
1782                 case_brw(beq,);
1783                 case_brr(bge,);
1784                 case_brw(bge,);
1785                 case_brr(bge, _u);
1786                 case_brw(bge, _u);
1787                 case_brr(bgt,);
1788                 case_brw(bgt,);
1789                 case_brr(bgt, _u);
1790                 case_brw(bgt, _u);
1791                 case_brr(bne,);
1792                 case_brw(bne,);
1793                 case_brr(bms,);
1794                 case_brw(bms,);
1795                 case_brr(bmc,);
1796                 case_brw(bmc,);
1797                 case_brr(boadd,);
1798                 case_brw(boadd,);
1799                 case_brr(boadd, _u);
1800                 case_brw(boadd, _u);
1801                 case_brr(bxadd,);
1802                 case_brw(bxadd,);
1803                 case_brr(bxadd, _u);
1804                 case_brw(bxadd, _u);
1805                 case_brr(bosub,);
1806                 case_brw(bosub,);
1807                 case_brr(bosub, _u);
1808                 case_brw(bosub, _u);
1809                 case_brr(bxsub,);
1810                 case_brw(bxsub,);
1811                 case_brr(bxsub, _u);
1812                 case_brw(bxsub, _u);
1813                 case_fff(add, _f);
1814                 case_ffw(add, _f, 32);
1815                 case_fff(sub, _f);
1816                 case_ffw(sub, _f, 32);
1817                 case_ffw(rsb, _f, 32);
1818                 case_fff(mul, _f);
1819                 case_ffw(mul, _f, 32);
1820                 case_fff(div, _f);
1821                 case_ffw(div, _f, 32);
1822                 case_ff(abs, _f);
1823                 case_ff(neg, _f);
1824                 case_ff(sqrt, _f);
1825                 case_fr(ext, _f);
1826                 case_fr(ext, _d_f);
1827                 case_rff(lt, _f);
1828                 case_rfw(lt, _f, 32);
1829                 case_rff(le, _f);
1830                 case_rfw(le, _f, 32);
1831                 case_rff(eq, _f);
1832                 case_rfw(eq, _f, 32);
1833                 case_rff(ge, _f);
1834                 case_rfw(ge, _f, 32);
1835                 case_rff(gt, _f);
1836                 case_rfw(gt, _f, 32);
1837                 case_rff(ne, _f);
1838                 case_rfw(ne, _f, 32);
1839                 case_rff(unlt, _f);
1840                 case_rfw(unlt, _f, 32);
1841                 case_rff(unle, _f);
1842                 case_rfw(unle, _f, 32);
1843                 case_rff(uneq, _f);
1844                 case_rfw(uneq, _f, 32);
1845                 case_rff(unge, _f);
1846                 case_rfw(unge, _f, 32);
1847                 case_rff(ungt, _f);
1848                 case_rfw(ungt, _f, 32);
1849                 case_rff(ltgt, _f);
1850                 case_rfw(ltgt, _f, 32);
1851                 case_rff(ord, _f);
1852                 case_rfw(ord, _f, 32);
1853                 case_rff(unord, _f);
1854                 case_rfw(unord, _f, 32);
1855             case jit_code_movr_f:
1856                 if (jit_x87_reg_p(node->u.w)) {
1857                     if (jit_x87_reg_p(node->v.w))
1858                         x87_movr_f(rn(node->u.w), rn(node->v.w));
1859                     else
1860                         x87_from_sse_f(rn(node->u.w), rn(node->v.w));
1861                 }
1862                 else {
1863                     if (jit_sse_reg_p(node->v.w))
1864                         sse_movr_f(rn(node->u.w), rn(node->v.w));
1865                     else
1866                         sse_from_x87_f(rn(node->u.w), rn(node->v.w));
1867                 }
1868                 break;
1869             case jit_code_movi_f:
1870                 assert(node->flag & jit_flag_data);
1871                 if (jit_x87_reg_p(node->u.w))
1872                     x87_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
1873                 else
1874                     sse_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
1875                 break;
1876                 case_fr(ld, _f);
1877                 case_fw(ld, _f);
1878                 case_frr(ldx, _f);
1879                 case_frw(ldx, _f);
1880                 case_rf(st, _f);
1881                 case_wf(st, _f);
1882                 case_rrf(stx, _f);
1883                 case_wrf(stx, _f);
1884                 case_bff(lt, _f);
1885                 case_bfw(lt, _f, 32);
1886                 case_bff(le, _f);
1887                 case_bfw(le, _f, 32);
1888                 case_bff(eq, _f);
1889                 case_bfw(eq, _f, 32);
1890                 case_bff(ge, _f);
1891                 case_bfw(ge, _f, 32);
1892                 case_bff(gt, _f);
1893                 case_bfw(gt, _f, 32);
1894                 case_bff(ne, _f);
1895                 case_bfw(ne, _f, 32);
1896                 case_bff(unlt, _f);
1897                 case_bfw(unlt, _f, 32);
1898                 case_bff(unle, _f);
1899                 case_bfw(unle, _f, 32);
1900                 case_bff(uneq, _f);
1901                 case_bfw(uneq, _f, 32);
1902                 case_bff(unge, _f);
1903                 case_bfw(unge, _f, 32);
1904                 case_bff(ungt, _f);
1905                 case_bfw(ungt, _f, 32);
1906                 case_bff(ltgt, _f);
1907                 case_bfw(ltgt, _f, 32);
1908                 case_bff(ord, _f);
1909                 case_bfw(ord, _f, 32);
1910                 case_bff(unord, _f);
1911                 case_bfw(unord, _f, 32);
1912                 case_fff(add, _d);
1913                 case_ffw(add, _d, 64);
1914                 case_fff(sub, _d);
1915                 case_ffw(sub, _d, 64);
1916                 case_ffw(rsb, _d, 64);
1917                 case_fff(mul, _d);
1918                 case_ffw(mul, _d, 64);
1919                 case_fff(div, _d);
1920                 case_ffw(div, _d, 64);
1921                 case_ff(abs, _d);
1922                 case_ff(neg, _d);
1923                 case_ff(sqrt, _d);
1924                 case_fr(ext, _d);
1925                 case_fr(ext, _f_d);
1926                 case_rff(lt, _d);
1927                 case_rfw(lt, _d, 64);
1928                 case_rff(le, _d);
1929                 case_rfw(le, _d, 64);
1930                 case_rff(eq, _d);
1931                 case_rfw(eq, _d, 64);
1932                 case_rff(ge, _d);
1933                 case_rfw(ge, _d, 64);
1934                 case_rff(gt, _d);
1935                 case_rfw(gt, _d, 64);
1936                 case_rff(ne, _d);
1937                 case_rfw(ne, _d, 64);
1938                 case_rff(unlt, _d);
1939                 case_rfw(unlt, _d, 64);
1940                 case_rff(unle, _d);
1941                 case_rfw(unle, _d, 64);
1942                 case_rff(uneq, _d);
1943                 case_rfw(uneq, _d, 64);
1944                 case_rff(unge, _d);
1945                 case_rfw(unge, _d, 64);
1946                 case_rff(ungt, _d);
1947                 case_rfw(ungt, _d, 64);
1948                 case_rff(ltgt, _d);
1949                 case_rfw(ltgt, _d, 64);
1950                 case_rff(ord, _d);
1951                 case_rfw(ord, _d, 64);
1952                 case_rff(unord, _d);
1953                 case_rfw(unord, _d, 64);
1954             case jit_code_movr_d:
1955                 if (jit_x87_reg_p(node->u.w)) {
1956                     if (jit_x87_reg_p(node->v.w))
1957                         x87_movr_d(rn(node->u.w), rn(node->v.w));
1958                     else
1959                         x87_from_sse_d(rn(node->u.w), rn(node->v.w));
1960                 }
1961                 else {
1962                     if (jit_sse_reg_p(node->v.w))
1963                         sse_movr_d(rn(node->u.w), rn(node->v.w));
1964                     else
1965                         sse_from_x87_d(rn(node->u.w), rn(node->v.w));
1966                 }
1967                 break;
1968             case jit_code_movi_d:
1969                 assert(node->flag & jit_flag_data);
1970                 if (jit_x87_reg_p(node->u.w))
1971                     x87_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
1972                 else
1973                     sse_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w);
1974                 break;
1975                 case_fr(ld, _d);
1976                 case_fw(ld, _d);
1977                 case_frr(ldx, _d);
1978                 case_frw(ldx, _d);
1979                 case_rf(st, _d);
1980                 case_wf(st, _d);
1981                 case_rrf(stx, _d);
1982                 case_wrf(stx, _d);
1983                 case_bff(lt, _d);
1984                 case_bfw(lt, _d, 64);
1985                 case_bff(le, _d);
1986                 case_bfw(le, _d, 64);
1987                 case_bff(eq, _d);
1988                 case_bfw(eq, _d, 64);
1989                 case_bff(ge, _d);
1990                 case_bfw(ge, _d, 64);
1991                 case_bff(gt, _d);
1992                 case_bfw(gt, _d, 64);
1993                 case_bff(ne, _d);
1994                 case_bfw(ne, _d, 64);
1995                 case_bff(unlt, _d);
1996                 case_bfw(unlt, _d, 64);
1997                 case_bff(unle, _d);
1998                 case_bfw(unle, _d, 64);
1999                 case_bff(uneq, _d);
2000                 case_bfw(uneq, _d, 64);
2001                 case_bff(unge, _d);
2002                 case_bfw(unge, _d, 64);
2003                 case_bff(ungt, _d);
2004                 case_bfw(ungt, _d, 64);
2005                 case_bff(ltgt, _d);
2006                 case_bfw(ltgt, _d, 64);
2007                 case_bff(ord, _d);
2008                 case_bfw(ord, _d, 64);
2009                 case_bff(unord, _d);
2010                 case_bfw(unord, _d, 64);
2011             case jit_code_jmpr:
2012                 jmpr(rn(node->u.w));
2013                 break;
2014             case jit_code_jmpi:
2015                 if (node->flag & jit_flag_node) {
2016                     temp = node->u.n;
2017                     assert(temp->code == jit_code_label ||
2018                            temp->code == jit_code_epilog);
2019                     if (temp->flag & jit_flag_patch)
2020                         jmpi(temp->u.w);
2021                     else {
2022                         word = jmpi_p(_jit->pc.w);
2023                         patch(word, node);
2024                     }
2025                 }
2026                 else
2027                     jmpi(node->u.w);
2028                 break;
2029             case jit_code_callr:
2030                 callr(rn(node->u.w));
2031                 break;
2032             case jit_code_calli:
2033                 if (node->flag & jit_flag_node) {
2034                     temp = node->u.n;
2035                     assert(temp->code == jit_code_label ||
2036                            temp->code == jit_code_epilog);
2037                     if (temp->flag & jit_flag_patch)
2038                         calli(temp->u.w);
2039                     else {
2040                         word = calli_p(_jit->pc.w);
2041                         patch(word, node);
2042                     }
2043                 }
2044                 else
2045                     calli(node->u.w);
2046                 break;
2047             case jit_code_prolog:
2048                 _jitc->function = _jitc->functions.ptr + node->w.w;
2049                 undo.node = node;
2050                 undo.word = _jit->pc.w;
2051 #if DEVEL_DISASSEMBLER
2052                 undo.prevw = prevw;
2053 #endif
2054                 undo.patch_offset = _jitc->patches.offset;
2055             restart_function:
2056                 _jitc->again = 0;
2057                 prolog(node);
2058                 break;
2059             case jit_code_epilog:
2060                 assert(_jitc->function == _jitc->functions.ptr + node->w.w);
2061                 if (_jitc->again) {
2062                     for (temp = undo.node->next;
2063                          temp != node; temp = temp->next) {
2064                         if (temp->code == jit_code_label ||
2065                             temp->code == jit_code_epilog)
2066                             temp->flag &= ~jit_flag_patch;
2067                     }
2068                     temp->flag &= ~jit_flag_patch;
2069                     node = undo.node;
2070                     _jit->pc.w = undo.word;
2071 #if DEVEL_DISASSEMBLER
2072                     prevw = undo.prevw;
2073 #endif
2074                     _jitc->patches.offset = undo.patch_offset;
2075                     goto restart_function;
2076                 }
2077                 if (node->link &&
2078                     (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
2079                     nop(sizeof(jit_word_t) - word);
2080                 /* remember label is defined */
2081                 node->flag |= jit_flag_patch;
2082                 node->u.w = _jit->pc.w;
2083                 epilog(node);
2084                 _jitc->function = NULL;
2085                 break;
2086             case jit_code_va_start:
2087                 vastart(rn(node->u.w));
2088                 break;
2089             case jit_code_va_arg:
2090                 vaarg(rn(node->u.w), rn(node->v.w));
2091                 break;
2092             case jit_code_va_arg_d:
2093                 vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w));
2094                 break;
2095             case jit_code_live:                 case jit_code_ellipsis:
2096             case jit_code_va_push:
2097             case jit_code_allocai:              case jit_code_allocar:
2098             case jit_code_arg:
2099             case jit_code_arg_f:                case jit_code_arg_d:
2100             case jit_code_va_end:
2101             case jit_code_ret:
2102             case jit_code_retr:                 case jit_code_reti:
2103             case jit_code_retr_f:               case jit_code_reti_f:
2104             case jit_code_retr_d:               case jit_code_reti_d:
2105             case jit_code_getarg_c:             case jit_code_getarg_uc:
2106             case jit_code_getarg_s:             case jit_code_getarg_us:
2107             case jit_code_getarg_i:
2108 #if __X64 && !__X64_32
2109             case jit_code_getarg_ui:            case jit_code_getarg_l:
2110 #endif
2111             case jit_code_getarg_f:             case jit_code_getarg_d:
2112             case jit_code_putargr:              case jit_code_putargi:
2113             case jit_code_putargr_f:            case jit_code_putargi_f:
2114             case jit_code_putargr_d:            case jit_code_putargi_d:
2115             case jit_code_pushargr:             case jit_code_pushargi:
2116             case jit_code_pushargr_f:           case jit_code_pushargi_f:
2117             case jit_code_pushargr_d:           case jit_code_pushargi_d:
2118             case jit_code_retval_c:             case jit_code_retval_uc:
2119             case jit_code_retval_s:             case jit_code_retval_us:
2120             case jit_code_retval_i:
2121 #if __X64 && !__X32
2122             case jit_code_retval_ui:            case jit_code_retval_l:
2123 #endif
2124             case jit_code_prepare:
2125             case jit_code_finishr:              case jit_code_finishi:
2126                 break;
2127             case jit_code_retval_f:
2128 #if __X32
2129                 if (jit_sse_reg_p(node->u.w)) {
2130                     fstpr(_ST1_REGNO);
2131                     sse_from_x87_f(rn(node->u.w), _ST0_REGNO);
2132                 }
2133                 else
2134                     fstpr(rn(node->u.w) + 1);
2135 #endif
2136                 break;
2137             case jit_code_retval_d:
2138 #if __X32
2139                 if (jit_sse_reg_p(node->u.w)) {
2140                     fstpr(_ST1_REGNO);
2141                     sse_from_x87_d(rn(node->u.w), _ST0_REGNO);
2142                 }
2143                 else
2144                     fstpr(rn(node->u.w) + 1);
2145 #endif
2146                 break;
2147             default:
2148                 abort();
2149         }
2150         jit_regarg_clr(node, value);
2151         assert(_jitc->regarg == 0 && _jitc->synth == 0);
2152         /* update register live state */
2153         jit_reglive(node);
2154     }
2155 #undef case_bfw
2156 #undef case_bff
2157 #undef case_ffw
2158 #undef case_rfw
2159 #undef case_rff
2160 #undef case_brw
2161 #undef case_brr
2162 #undef case_wrf
2163 #undef case_wrr
2164 #undef case_frw
2165 #undef case_rrf
2166 #undef case_rrw
2167 #undef case_frr
2168 #undef case_rrr
2169 #undef case_wf
2170 #undef case_fw
2171 #undef case_fr
2172 #undef case_rr
2173
2174     for (offset = 0; offset < _jitc->patches.offset; offset++) {
2175         node = _jitc->patches.ptr[offset].node;
2176         word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
2177         patch_at(node, _jitc->patches.ptr[offset].inst, word);
2178     }
2179
2180     jit_flush(_jit->code.ptr, _jit->pc.uc);
2181
2182     return (_jit->code.ptr);
2183 }
2184
2185 #define CODE                            1
2186 #  include "jit_x86-cpu.c"
2187 #  include "jit_x86-sse.c"
2188 #  include "jit_x86-x87.c"
2189 #undef CODE
2190
2191 void
2192 jit_flush(void *fptr, void *tptr)
2193 {
2194 }
2195
2196 void
2197 _emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
2198 {
2199     ldxi(rn(r0), rn(r1), i0);
2200 }
2201
2202 void
2203 _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
2204 {
2205     stxi(i0, rn(r0), rn(r1));
2206 }
2207
2208 void
2209 _emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
2210 {
2211     if (jit_x87_reg_p(r0))
2212         x87_ldxi_d(rn(r0), rn(r1), i0);
2213     else
2214         sse_ldxi_d(rn(r0), rn(r1), i0);
2215 }
2216
2217 void
2218 _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
2219 {
2220     if (jit_x87_reg_p(r1))
2221         x87_stxi_d(i0, rn(r0), rn(r1));
2222     else
2223         sse_stxi_d(i0, rn(r0), rn(r1));
2224 }
2225
2226 static void
2227 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
2228 {
2229     jit_int32_t         flag;
2230
2231     assert(node->flag & jit_flag_node);
2232     if (node->code == jit_code_movi)
2233         flag = node->v.n->flag;
2234     else
2235         flag = node->u.n->flag;
2236     assert(!(flag & jit_flag_patch));
2237     if (_jitc->patches.offset >= _jitc->patches.length) {
2238         jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
2239                     _jitc->patches.length * sizeof(jit_patch_t),
2240                     (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
2241         _jitc->patches.length += 1024;
2242     }
2243     _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
2244     _jitc->patches.ptr[_jitc->patches.offset].node = node;
2245     ++_jitc->patches.offset;
2246 }
2247
2248 static void
2249 _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2250 {
2251     x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2252     sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2253 }
2254
2255 static void
2256 _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2257 {
2258     x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2259     sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2260 }
2261
2262 static void
2263 _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2264 {
2265     sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
2266     x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
2267 }
2268
2269 static void
2270 _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2271 {
2272     sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
2273     x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
2274 }