add a thp-based huge page alloc fallback
[pcsx_rearmed.git] / deps / lightning / lib / jit_riscv.c
1 /*
2  * Copyright (C) 2019-2023  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 /* callee save                                    + variadic arguments
21  * align16(ra+fp+s[1-9]+s10+s11+fs[0-9]+fs10+fs11)+align16(a[0-7]) */
22 #define stack_framesize                 (208 + 64)
23
24 #define jit_arg_reg_p(i)                ((i) >= 0 && (i) < 8)
25 #define jit_arg_f_reg_p(i)              ((i) >= 0 && (i) < 8)
26
27 /*
28  * Types
29  */
30 typedef jit_pointer_t jit_va_list_t;
31
32 /*
33  * Prototypes
34  */
35 #define compute_framesize()             _compute_framesize(_jit)
36 static void _compute_framesize(jit_state_t*);
37 #if __WORDSIZE == 64
38 #  define load_const(r0, i0)            _load_const(_jit, r0, i0)
39 static void _load_const(jit_state_t*, jit_int32_t, jit_word_t);
40 static jit_word_t hash_const(jit_word_t);
41 #  define put_const(i0)                 _put_const(_jit, i0)
42 static void _put_const(jit_state_t*, jit_word_t);
43 #  define get_const(i0)                 _get_const(_jit, i0)
44 static jit_word_t _get_const(jit_state_t*, jit_word_t);
45 #endif
46 #define patch(instr, node)              _patch(_jit, instr, node)
47 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
48
49 #define PROTO                           1
50 #  include "jit_riscv-cpu.c"
51 #  include "jit_riscv-fpu.c"
52 #  include "jit_fallback.c"
53 #undef PROTO
54
55 /*
56  * Initialization
57  */
58 jit_cpu_t               jit_cpu;
59 jit_register_t          _rvs[] = {
60     { 0x00,                             "zero" },
61     { 0x01,                             "ra" },
62     { 0x02,                             "sp" },
63     { 0x03,                             "gp" },
64 #if 0           /* Pretend it does not exist, so _NOREG can be used in
65                  * a 64 bit bitmask */
66     { 0x04,                             "tp" },
67 #endif
68     { rc(gpr) | 0x05,                   "t0" },
69     { rc(gpr) | 0x06,                   "t1" },
70     { rc(gpr) | 0x07,                   "t2" },
71     { rc(gpr) | 0x1c,                   "t3" },
72     { rc(gpr) | 0x1d,                   "t4" },
73     { rc(gpr) | 0x1e,                   "t5" },
74     { rc(gpr) | 0x1f,                   "t6" },
75     { 0x08,                             "fp" },
76     { rc(sav) | rc(gpr) | 0x09,         "s1" },
77     { rc(sav) | rc(gpr) | 0x12,         "s2" },
78     { rc(sav) | rc(gpr) | 0x13,         "s3" },
79     { rc(sav) | rc(gpr) | 0x14,         "s4" },
80     { rc(sav) | rc(gpr) | 0x15,         "s5" },
81     { rc(sav) | rc(gpr) | 0x16,         "s6" },
82     { rc(sav) | rc(gpr) | 0x17,         "s7" },
83     { rc(sav) | rc(gpr) | 0x18,         "s8" },
84     { rc(sav) | rc(gpr) | 0x19,         "s9" },
85     { rc(sav) | rc(gpr) | 0x1a,         "s10" },
86     { rc(sav) | rc(gpr) | 0x1b,         "s11" },
87     { rc(arg) | rc(gpr) | 0x11,         "a7" },
88     { rc(arg) | rc(gpr) | 0x10,         "a6" },
89     { rc(arg) | rc(gpr) | 0x0f,         "a5" },
90     { rc(arg) | rc(gpr) | 0x0e,         "a4" },
91     { rc(arg) | rc(gpr) | 0x0d,         "a3" },
92     { rc(arg) | rc(gpr) | 0x0c,         "a2" },
93     { rc(arg) | rc(gpr) | 0x0b,         "a1" },
94     { rc(arg) | rc(gpr) | 0x0a,         "a0" },
95     { rc(fpr) | 0x00,                   "ft0" },
96     { rc(fpr) | 0x01,                   "ft1" },
97     { rc(fpr) | 0x02,                   "ft2" },
98     { rc(fpr) | 0x03,                   "ft3" },
99     { rc(fpr) | 0x04,                   "ft4" },
100     { rc(fpr) | 0x05,                   "ft5" },
101     { rc(fpr) | 0x06,                   "ft6" },
102     { rc(fpr) | 0x07,                   "ft7" },
103     { rc(fpr) | 0x1c,                   "ft8" },
104     { rc(fpr) | 0x1d,                   "ft9" },
105     { rc(fpr) | 0x1e,                   "ft10" },
106     { rc(fpr) | 0x1f,                   "ft11" },
107     { rc(sav) | rc(fpr) | 0x08,         "fs0" },
108     { rc(sav) | rc(fpr) | 0x09,         "fs1" },
109     { rc(sav) | rc(fpr) | 0x12,         "fs2" },
110     { rc(sav) | rc(fpr) | 0x13,         "fs3" },
111     { rc(sav) | rc(fpr) | 0x14,         "fs4" },
112     { rc(sav) | rc(fpr) | 0x15,         "fs5" },
113     { rc(sav) | rc(fpr) | 0x16,         "fs6" },
114     { rc(sav) | rc(fpr) | 0x17,         "fs7" },
115     { rc(sav) | rc(fpr) | 0x18,         "fs8" },
116     { rc(sav) | rc(fpr) | 0x19,         "fs9" },
117     { rc(sav) | rc(fpr) | 0x1a,         "fs10" },
118     { rc(sav) | rc(fpr) | 0x1b,         "fs11" },
119     { rc(arg) | rc(fpr) | 0x11,         "fa7" },
120     { rc(arg) | rc(fpr) | 0x10,         "fa6" },
121     { rc(arg) | rc(fpr) | 0x0f,         "fa5" },
122     { rc(arg) | rc(fpr) | 0x0e,         "fa4" },
123     { rc(arg) | rc(fpr) | 0x0d,         "fa3" },
124     { rc(arg) | rc(fpr) | 0x0c,         "fa2" },
125     { rc(arg) | rc(fpr) | 0x0b,         "fa1" },
126     { rc(arg) | rc(fpr) | 0x0a,         "fa0" },
127     { _NOREG,                           "<none>" },
128 };
129
130 static jit_int32_t iregs[] = {
131     _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10, _S11
132 };
133
134 static jit_int32_t fregs[] = {
135     _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7, _FS8, _FS9, _FS10, _FS11
136 };
137
138 /*
139  * Implementation
140  */
141 void
142 jit_get_cpu(void)
143 {
144     /* By default generate extra instructions for unaligned load/store. */
145     jit_cpu.unaligned = 0;
146 }
147
148 void
149 _jit_init(jit_state_t *_jit)
150 {
151     _jitc->reglen = jit_size(_rvs) - 1;
152     jit_carry = _NOREG;
153 }
154
155 void
156 _jit_prolog(jit_state_t *_jit)
157 {
158     jit_int32_t          offset;
159
160     if (_jitc->function)
161         jit_epilog();
162     assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
163     jit_regset_set_ui(&_jitc->regsav, 0);
164     offset = _jitc->functions.offset;
165     if (offset >= _jitc->functions.length) {
166         jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
167                     _jitc->functions.length * sizeof(jit_function_t),
168                     (_jitc->functions.length + 16) * sizeof(jit_function_t));
169         _jitc->functions.length += 16;
170     }
171     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
172     _jitc->function->self.size = stack_framesize;
173     _jitc->function->self.argi = _jitc->function->self.argf =
174         _jitc->function->self.alen = 0;
175     _jitc->function->self.aoff = 0;
176     _jitc->function->self.call = jit_call_default;
177     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
178               _jitc->reglen * sizeof(jit_int32_t));
179
180     /* _no_link here does not mean the jit_link() call can be removed
181      * by rewriting as:
182      * _jitc->function->prolog = jit_new_node(jit_code_prolog);
183      */
184     _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
185     jit_link(_jitc->function->prolog);
186     _jitc->function->prolog->w.w = offset;
187     _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
188     /*  u:      label value
189      *  v:      offset in blocks vector
190      *  w:      offset in functions vector
191      */
192     _jitc->function->epilog->w.w = offset;
193
194     jit_regset_new(&_jitc->function->regset);
195 }
196
197 jit_int32_t
198 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
199 {
200     assert(_jitc->function);
201     jit_check_frame();
202     switch (length) {
203         case 0: case 1:                                         break;
204         case 2:         _jitc->function->self.aoff &= -2;       break;
205         case 3: case 4: _jitc->function->self.aoff &= -4;       break;
206         default:        _jitc->function->self.aoff &= -8;       break;
207     }
208     _jitc->function->self.aoff -= length;
209     if (!_jitc->realize) {
210         jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
211         jit_dec_synth();
212     }
213     return (_jitc->function->self.aoff);
214 }
215
216 void
217 _jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
218 {
219     jit_int32_t          r0;
220     assert(_jitc->function);
221     jit_inc_synth_ww(allocar, u, v);
222     if (!_jitc->function->allocar) {
223         _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
224         _jitc->function->allocar = 1;
225     }
226     r0 = jit_get_reg(jit_class_gpr);
227     jit_negr(r0, v);
228     jit_andi(r0, r0, -16);
229     jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
230     jit_addr(u, u, r0);
231     jit_addr(JIT_SP, JIT_SP, r0);
232     jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
233     jit_unget_reg(r0);
234     jit_dec_synth();
235 }
236
237 void
238 _jit_ret(jit_state_t *_jit)
239 {
240     jit_node_t          *instr;
241     assert(_jitc->function);
242     jit_inc_synth(ret);
243     /* jump to epilog */
244     instr = jit_jmpi();
245     jit_patch_at(instr, _jitc->function->epilog);
246     jit_dec_synth();
247 }
248
249 void
250 _jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
251 {
252     jit_code_inc_synth_w(code, u);
253     jit_movr(JIT_RET, u);
254     jit_ret();
255     jit_dec_synth();
256 }
257
258 void
259 _jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
260 {
261     jit_code_inc_synth_w(code, u);
262     jit_movi(JIT_RET, u);
263     jit_ret();
264     jit_dec_synth();
265 }
266
267 void
268 _jit_retr_f(jit_state_t *_jit, jit_int32_t u)
269 {
270     jit_inc_synth_w(retr_f, u);
271     if (u != JIT_FRET)
272         jit_movr_f(JIT_FRET, u);
273     else
274         jit_live(JIT_FRET);
275     jit_ret();
276     jit_dec_synth();
277 }
278
279 void
280 _jit_reti_f(jit_state_t *_jit, jit_float32_t u)
281 {
282     jit_inc_synth_f(reti_f, u);
283     jit_movi_f(JIT_FRET, u);
284     jit_ret();
285     jit_dec_synth();
286 }
287
288 void
289 _jit_retr_d(jit_state_t *_jit, jit_int32_t u)
290 {
291     jit_inc_synth_w(retr_d, u);
292     if (u != JIT_FRET)
293         jit_movr_d(JIT_FRET, u);
294     else
295         jit_live(JIT_FRET);
296     jit_ret();
297     jit_dec_synth();
298 }
299
300 void
301 _jit_reti_d(jit_state_t *_jit, jit_float64_t u)
302 {
303     jit_inc_synth_d(reti_d, u);
304     jit_movi_d(JIT_FRET, u);
305     jit_ret();
306     jit_dec_synth();
307 }
308
309 void
310 _jit_epilog(jit_state_t *_jit)
311 {
312     assert(_jitc->function);
313     assert(_jitc->function->epilog->next == NULL);
314     jit_link(_jitc->function->epilog);
315     _jitc->function = NULL;
316 }
317
318 jit_bool_t
319 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
320 {
321     if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
322         return (jit_arg_reg_p(u->u.w));
323     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
324     return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
325 }
326
327 void
328 _jit_ellipsis(jit_state_t *_jit)
329 {
330     jit_inc_synth(ellipsis);
331     jit_check_frame();
332     if (_jitc->prepare) {
333         jit_link_prepare();
334         assert(!(_jitc->function->call.call & jit_call_varargs));
335         _jitc->function->call.call |= jit_call_varargs;
336     }
337     else {
338         jit_link_prolog();
339         assert(!(_jitc->function->self.call & jit_call_varargs));
340         _jitc->function->self.call |= jit_call_varargs;
341         _jitc->function->vagp = _jitc->function->self.argi;
342     }
343     jit_dec_synth();
344 }
345
346 void
347 _jit_va_push(jit_state_t *_jit, jit_int32_t u)
348 {
349     jit_inc_synth_w(va_push, u);
350     jit_pushargr(u);
351     jit_dec_synth();
352 }
353
354 jit_node_t *
355 _jit_arg(jit_state_t *_jit, jit_code_t code)
356 {
357     jit_node_t          *node;
358     jit_int32_t          offset;
359     assert(_jitc->function);
360     assert(!(_jitc->function->self.call & jit_call_varargs));
361 #if STRONG_TYPE_CHECKING
362     assert(code >= jit_code_arg_c && code <= jit_code_arg);
363 #endif
364     if (jit_arg_reg_p(_jitc->function->self.argi))
365         offset = _jitc->function->self.argi++;
366     else {
367         offset = _jitc->function->self.size;
368         _jitc->function->self.size += sizeof(jit_word_t);
369         jit_check_frame();
370     }
371     node = jit_new_node_ww(code, offset,
372                            ++_jitc->function->self.argn);
373     jit_link_prolog();
374     return (node);
375 }
376
377 jit_node_t *
378 _jit_arg_f(jit_state_t *_jit)
379 {
380     jit_node_t          *node;
381     jit_int32_t          offset;
382     assert(_jitc->function);
383     assert(!(_jitc->function->self.call & jit_call_varargs));
384     if (jit_arg_f_reg_p(_jitc->function->self.argf))
385         offset = _jitc->function->self.argf++;
386     else if (jit_arg_reg_p(_jitc->function->self.argi)) {
387         offset = _jitc->function->self.argi++;
388         offset += 8;
389     }
390     else {
391         offset = _jitc->function->self.size;
392         _jitc->function->self.size += sizeof(jit_word_t);
393         jit_check_frame();
394     }
395     node = jit_new_node_ww(jit_code_arg_f, offset,
396                            ++_jitc->function->self.argn);
397     jit_link_prolog();
398     return (node);
399 }
400
401 jit_node_t *
402 _jit_arg_d(jit_state_t *_jit)
403 {
404     jit_node_t          *node;
405     jit_int32_t          offset;
406     assert(_jitc->function);
407     assert(!(_jitc->function->self.call & jit_call_varargs));
408     if (jit_arg_f_reg_p(_jitc->function->self.argf))
409         offset = _jitc->function->self.argf++;
410     else if (jit_arg_reg_p(_jitc->function->self.argi)) {
411         offset = _jitc->function->self.argi++;
412         offset += 8;
413     }
414     else {
415         offset = _jitc->function->self.size;
416         _jitc->function->self.size += sizeof(jit_word_t);
417         jit_check_frame();
418     }
419     node = jit_new_node_ww(jit_code_arg_d, offset,
420                            ++_jitc->function->self.argn);
421     jit_link_prolog();
422     return (node);
423 }
424
425 void
426 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
427 {
428     assert_arg_type(v->code, jit_code_arg_c);
429     jit_inc_synth_wp(getarg_c, u, v);
430     if (jit_arg_reg_p(v->u.w))
431         jit_extr_c(u, JIT_RA0 - v->u.w);
432     else {
433         jit_node_t      *node = jit_ldxi_c(u, JIT_FP, v->u.w);
434         jit_link_alist(node);
435     }
436     jit_dec_synth();
437 }
438
439 void
440 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
441 {
442     assert_arg_type(v->code, jit_code_arg_c);
443     jit_inc_synth_wp(getarg_uc, u, v);
444     if (jit_arg_reg_p(v->u.w))
445         jit_extr_uc(u, JIT_RA0 - v->u.w);
446     else {
447         jit_node_t      *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
448         jit_link_alist(node);
449     }
450     jit_dec_synth();
451 }
452
453 void
454 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
455 {
456     assert_arg_type(v->code, jit_code_arg_s);
457     jit_inc_synth_wp(getarg_s, u, v);
458     if (jit_arg_reg_p(v->u.w))
459         jit_extr_s(u, JIT_RA0 - v->u.w);
460     else {
461         jit_node_t      *node = jit_ldxi_s(u, JIT_FP, v->u.w);
462         jit_link_alist(node);
463     }
464     jit_dec_synth();
465 }
466
467 void
468 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
469 {
470     assert_arg_type(v->code, jit_code_arg_s);
471     jit_inc_synth_wp(getarg_us, u, v);
472     if (jit_arg_reg_p(v->u.w))
473         jit_extr_us(u, JIT_RA0 - v->u.w);
474     else {
475         jit_node_t      *node = jit_ldxi_us(u, JIT_FP, v->u.w);
476         jit_link_alist(node);
477     }
478     jit_dec_synth();
479 }
480
481 void
482 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
483 {
484     assert_arg_type(v->code, jit_code_arg_i);
485     jit_inc_synth_wp(getarg_i, u, v);
486     if (jit_arg_reg_p(v->u.w))
487         jit_extr_i(u, JIT_RA0 - v->u.w);
488     else {
489         jit_node_t      *node = jit_ldxi_i(u, JIT_FP, v->u.w);
490         jit_link_alist(node);
491     }
492     jit_dec_synth();
493 }
494
495 void
496 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
497 {
498     assert_arg_type(v->code, jit_code_arg_i);
499     jit_inc_synth_wp(getarg_ui, u, v);
500     if (jit_arg_reg_p(v->u.w))
501         jit_extr_ui(u, JIT_RA0 - v->u.w);
502     else {
503         jit_node_t      *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
504         jit_link_alist(node);
505     }
506     jit_dec_synth();
507 }
508
509 void
510 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
511 {
512     assert_arg_type(v->code, jit_code_arg_l);
513     jit_inc_synth_wp(getarg_l, u, v);
514     if (jit_arg_reg_p(v->u.w))
515         jit_movr(u, JIT_RA0 - v->u.w);
516     else {
517         jit_node_t      *node = jit_ldxi_l(u, JIT_FP, v->u.w);
518         jit_link_alist(node);
519     }
520     jit_dec_synth();
521 }
522
523 void
524 _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
525 {
526     assert_putarg_type(code, v->code);
527     jit_code_inc_synth_wp(code, u, v);
528     if (jit_arg_reg_p(v->u.w))
529         jit_movr(JIT_RA0 - v->u.w, u);
530     else {
531         jit_node_t      *node = jit_stxi(v->u.w, JIT_FP, u);
532         jit_link_alist(node);
533     }
534     jit_dec_synth();
535 }
536
537 void
538 _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
539 {
540     jit_int32_t         regno;
541     assert_putarg_type(code, v->code);
542     jit_code_inc_synth_wp(code, u, v);
543     if (jit_arg_reg_p(v->u.w))
544         jit_movi(JIT_RA0 - v->u.w, u);
545     else {
546         jit_node_t      *node;
547         regno = jit_get_reg(jit_class_gpr);
548         jit_movi(regno, u);
549         node = jit_stxi(v->u.w, JIT_FP, regno);
550         jit_link_alist(node);
551         jit_unget_reg(regno);
552     }
553     jit_dec_synth();
554 }
555
556 void
557 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
558 {
559     assert(v->code == jit_code_arg_f);
560     jit_inc_synth_wp(getarg_f, u, v);
561     if (jit_arg_f_reg_p(v->u.w))
562         jit_movr_f(u, JIT_FA0 - v->u.w);
563     else if (jit_arg_reg_p(v->u.w - 8))
564         jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
565     else {
566         jit_node_t      *node = jit_ldxi_f(u, JIT_FP, v->u.w);
567         jit_link_alist(node);
568     }
569     jit_dec_synth();
570 }
571
572 void
573 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
574 {
575     assert(v->code == jit_code_arg_f);
576     jit_inc_synth_wp(putargr_f, u, v);
577     if (jit_arg_f_reg_p(v->u.w))
578         jit_movr_f(JIT_FA0 - v->u.w, u);
579     else if (jit_arg_reg_p(v->u.w - 8))
580         jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
581     else {
582         jit_node_t      *node = jit_stxi_f(v->u.w, JIT_FP, u);
583         jit_link_alist(node);
584     }
585     jit_dec_synth();
586 }
587
588 void
589 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
590 {
591     jit_int32_t         regno;
592     assert(v->code == jit_code_arg_f);
593     jit_inc_synth_fp(putargi_f, u, v);
594     if (jit_arg_f_reg_p(v->u.w))
595         jit_movi_f(JIT_FA0 - v->u.w, u);
596     else if (jit_arg_reg_p(v->u.w - 8))
597         jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u);
598     else {
599         jit_node_t      *node;
600         regno = jit_get_reg(jit_class_fpr);
601         jit_movi_f(regno, u);
602         node = jit_stxi_f(v->u.w, JIT_FP, regno);
603         jit_link_alist(node);
604         jit_unget_reg(regno);
605     }
606     jit_dec_synth();
607 }
608
609 void
610 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
611 {
612     assert(v->code == jit_code_arg_d);
613     jit_inc_synth_wp(getarg_d, u, v);
614     if (jit_arg_f_reg_p(v->u.w))
615         jit_movr_d(u, JIT_FA0 - v->u.w);
616     else if (jit_arg_reg_p(v->u.w - 8))
617         jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
618     else {
619         jit_node_t      *node = jit_ldxi_d(u, JIT_FP, v->u.w);
620         jit_link_alist(node);
621     }
622     jit_dec_synth();
623 }
624
625 void
626 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
627 {
628     assert(v->code == jit_code_arg_d);
629     jit_inc_synth_wp(putargr_d, u, v);
630     if (jit_arg_reg_p(v->u.w))
631         jit_movr_d(JIT_FA0 - v->u.w, u);
632     else if (jit_arg_reg_p(v->u.w - 8))
633         jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
634     else {
635         jit_node_t      *node = jit_stxi_d(v->u.w, JIT_FP, u);
636         jit_link_alist(node);
637     }
638     jit_dec_synth();
639 }
640
641 void
642 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
643 {
644     jit_int32_t         regno;
645     assert(v->code == jit_code_arg_d);
646     jit_inc_synth_dp(putargi_d, u, v);
647     if (jit_arg_reg_p(v->u.w))
648         jit_movi_d(JIT_FA0 - v->u.w, u);
649     else if (jit_arg_reg_p(v->u.w - 8))
650         jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u);
651     else {
652         jit_node_t      *node;
653         regno = jit_get_reg(jit_class_fpr);
654         jit_movi_d(regno, u);
655         node = jit_stxi_d(v->u.w, JIT_FP, regno);
656         jit_link_alist(node);
657         jit_unget_reg(regno);
658     }
659     jit_dec_synth();
660 }
661
662 void
663 _jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
664 {
665     assert(_jitc->function);
666     jit_code_inc_synth_w(code, u);
667     jit_link_prepare();
668     if (jit_arg_reg_p(_jitc->function->call.argi)) {
669         jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
670         ++_jitc->function->call.argi;
671     }
672     else {
673         jit_stxi(_jitc->function->call.size, JIT_SP, u);
674         _jitc->function->call.size += sizeof(jit_word_t);
675         jit_check_frame();
676     }
677     jit_dec_synth();
678 }
679
680 void
681 _jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
682 {
683     jit_int32_t          regno;
684     assert(_jitc->function);
685     jit_code_inc_synth_w(code, u);
686     jit_link_prepare();
687     if (jit_arg_reg_p(_jitc->function->call.argi)) {
688         jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
689         ++_jitc->function->call.argi;
690     }
691     else {
692         regno = jit_get_reg(jit_class_gpr);
693         jit_movi(regno, u);
694         jit_stxi(_jitc->function->call.size, JIT_SP, regno);
695         jit_unget_reg(regno);
696         _jitc->function->call.size += sizeof(jit_word_t);
697         jit_check_frame();
698     }
699     jit_dec_synth();
700 }
701
702 void
703 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
704 {
705     assert(_jitc->function);
706     jit_inc_synth_w(pushargr_f, u);
707     jit_link_prepare();
708     if (jit_arg_f_reg_p(_jitc->function->call.argf) &&
709         !(_jitc->function->call.call & jit_call_varargs)) {
710         jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
711         ++_jitc->function->call.argf;
712     }
713     else if (jit_arg_reg_p(_jitc->function->call.argi)) {
714         jit_movr_f_w(JIT_RA0 - _jitc->function->call.argi, u);
715         ++_jitc->function->call.argi;
716     }
717     else {
718         jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
719         _jitc->function->call.size += sizeof(jit_word_t);
720         jit_check_frame();
721     }
722     jit_dec_synth();
723 }
724
725 void
726 _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
727 {
728     jit_int32_t         regno;
729     assert(_jitc->function);
730     jit_inc_synth_f(pushargi_f, u);
731     jit_link_prepare();
732     if (jit_arg_f_reg_p(_jitc->function->call.argf) &&
733         !(_jitc->function->call.call & jit_call_varargs)) {
734         jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
735         ++_jitc->function->call.argf;
736     }
737     else if (jit_arg_reg_p(_jitc->function->call.argi)) {
738         jit_movi_f_w(JIT_RA0 - _jitc->function->call.argi, u);
739         ++_jitc->function->call.argi;
740     }
741     else {
742         regno = jit_get_reg(jit_class_fpr);
743         jit_movi_f(regno, u);
744         jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
745         jit_unget_reg(regno);
746         _jitc->function->call.size += sizeof(jit_word_t);
747         jit_check_frame();
748     }
749     jit_dec_synth();
750 }
751
752 void
753 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
754 {
755     assert(_jitc->function);
756     jit_inc_synth_w(pushargr_d, u);
757     jit_link_prepare();
758     if (jit_arg_f_reg_p(_jitc->function->call.argf) &&
759         !(_jitc->function->call.call & jit_call_varargs)) {
760         jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
761         ++_jitc->function->call.argf;
762     }
763     else if (jit_arg_reg_p(_jitc->function->call.argi)) {
764         jit_movr_d_w(JIT_RA0 - _jitc->function->call.argi, u);
765         ++_jitc->function->call.argi;
766     }
767     else {
768         jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
769         _jitc->function->call.size += sizeof(jit_word_t);
770         jit_check_frame();
771     }
772     jit_dec_synth();
773 }
774
775 void
776 _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
777 {
778     jit_int32_t         regno;
779     assert(_jitc->function);
780     jit_inc_synth_d(pushargi_d, u);
781     jit_link_prepare();
782     if (jit_arg_f_reg_p(_jitc->function->call.argf) &&
783         !(_jitc->function->call.call & jit_call_varargs)) {
784         jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
785         ++_jitc->function->call.argf;
786     }
787     else if (jit_arg_reg_p(_jitc->function->call.argi)) {
788         jit_movi_d_w(JIT_RA0 - _jitc->function->call.argi, u);
789         ++_jitc->function->call.argi;
790     }
791     else {
792         regno = jit_get_reg(jit_class_fpr);
793         jit_movi_d(regno, u);
794         jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
795         jit_unget_reg(regno);
796         _jitc->function->call.size += sizeof(jit_word_t);
797         jit_check_frame();
798     }
799     jit_dec_synth();
800 }
801
802 jit_bool_t
803 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
804 {
805     jit_int32_t         spec;
806     spec = jit_class(_rvs[regno].spec);
807     if (spec & jit_class_arg) {
808         regno = JIT_RA0 - regno;
809         if (regno >= 0 && regno < node->v.w)
810             return (1);
811         if (spec & jit_class_fpr) {
812             regno = JIT_FA0 - regno;
813             if (regno >= 0 && regno < node->w.w)
814                 return (1);
815         }
816     }
817
818     return (0);
819 }
820
821 void
822 _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
823 {
824     jit_node_t          *node;
825     assert(_jitc->function);
826     jit_check_frame();
827     jit_inc_synth_w(finishr, r0);
828     if (_jitc->function->self.alen < _jitc->function->call.size)
829         _jitc->function->self.alen = _jitc->function->call.size;
830     node = jit_callr(r0);
831     node->v.w = _jitc->function->self.argi;
832     node->w.w = _jitc->function->call.argf;
833     _jitc->function->call.argi = _jitc->function->call.argf =
834         _jitc->function->call.size = 0;
835     _jitc->prepare = 0;
836     jit_dec_synth();
837 }
838
839 jit_node_t *
840 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
841 {
842     jit_node_t          *node;
843     assert(_jitc->function);
844     jit_check_frame();
845     jit_inc_synth_w(finishi, (jit_word_t)i0);
846     if (_jitc->function->self.alen < _jitc->function->call.size)
847         _jitc->function->self.alen = _jitc->function->call.size;
848     node = jit_calli(i0);
849     node->v.w = _jitc->function->call.argi;
850     node->w.w = _jitc->function->call.argf;
851     _jitc->function->call.argi = _jitc->function->call.argf =
852         _jitc->function->call.size = 0;
853     _jitc->prepare = 0;
854     jit_dec_synth();
855     return (node);
856 }
857
858 void
859 _jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
860 {
861     jit_inc_synth_w(retval_c, r0);
862     jit_extr_c(r0, JIT_RET);
863     jit_dec_synth();
864 }
865
866 void
867 _jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
868 {
869     jit_inc_synth_w(retval_uc, r0);
870     jit_extr_uc(r0, JIT_RET);
871     jit_dec_synth();
872 }
873
874 void
875 _jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
876 {
877     jit_inc_synth_w(retval_s, r0);
878     jit_extr_s(r0, JIT_RET);
879     jit_dec_synth();
880 }
881
882 void
883 _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
884 {
885     jit_inc_synth_w(retval_us, r0);
886     jit_extr_us(r0, JIT_RET);
887     jit_dec_synth();
888 }
889
890 void
891 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
892 {
893     jit_inc_synth_w(retval_i, r0);
894     jit_extr_i(r0, JIT_RET);
895     jit_dec_synth();
896 }
897
898 void
899 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
900 {
901     jit_inc_synth_w(retval_ui, r0);
902     jit_extr_ui(r0, JIT_RET);
903     jit_dec_synth();
904 }
905
906 void
907 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
908 {
909     jit_inc_synth_w(retval_l, r0);
910     if (r0 != JIT_RET)
911         jit_movr(r0, JIT_RET);
912     jit_dec_synth();
913 }
914
915 void
916 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
917 {
918     jit_inc_synth_w(retval_f, r0);
919     if (r0 != JIT_FRET)
920         jit_movr_f(r0, JIT_FRET);
921     jit_dec_synth();
922 }
923
924 void
925 _jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
926 {
927     jit_inc_synth_w(retval_d, r0);
928     if (r0 != JIT_FRET)
929         jit_movr_d(r0, JIT_FRET);
930     jit_dec_synth();
931 }
932
933 jit_pointer_t
934 _emit_code(jit_state_t *_jit)
935 {
936     jit_node_t          *node;
937     jit_node_t          *temp;
938     jit_word_t           word;
939     jit_word_t           value;
940     jit_int32_t          offset;
941     struct {
942         jit_node_t      *node;
943         jit_uint8_t     *data;
944         jit_word_t       word;
945         jit_function_t   func;
946 #if DEVEL_DISASSEMBLER
947         jit_word_t       prevw;
948 #endif
949         jit_int32_t      const_offset;
950         jit_int32_t      patch_offset;
951     } undo;
952 #if DEVEL_DISASSEMBLER
953     jit_word_t           prevw;
954 #endif
955
956 #if __WORDSIZE == 64
957     if (!_jitc->consts.hash.table) {
958         jit_alloc((jit_pointer_t *)&_jitc->consts.hash.table,
959                   16 * sizeof(jit_const_t *));
960         _jitc->consts.hash.size = 16;
961         jit_alloc((jit_pointer_t *)&_jitc->consts.pool.ptr,
962                   sizeof(jit_const_t *));
963         jit_alloc((jit_pointer_t *)_jitc->consts.pool.ptr,
964                   1024 * sizeof(jit_const_t));
965         _jitc->consts.pool.length = 1;
966     }
967     /* Reset table if starting over jit generation */
968     else
969         memset(_jitc->consts.hash.table, 0,
970                _jitc->consts.hash.size * sizeof(jit_word_t));
971     for (offset = 0; offset < _jitc->consts.pool.length; offset++) {
972         jit_int32_t      i;
973         jit_const_t     *list = _jitc->consts.pool.ptr[offset];
974         for (i = 0; i < 1023; ++i, ++list)
975             list->next = list + 1;
976         if (offset + 1 < _jitc->consts.pool.length)
977             list->next = _jitc->consts.pool.ptr[offset + 1];
978         else
979             list->next = NULL;
980     }
981     _jitc->consts.pool.list = _jitc->consts.pool.ptr[0];
982     _jitc->consts.hash.count = 0;
983     if (!_jitc->consts.vector.instrs) {
984         jit_alloc((jit_pointer_t *)&_jitc->consts.vector.instrs,
985                   16 * sizeof(jit_word_t));
986         jit_alloc((jit_pointer_t *)&_jitc->consts.vector.values,
987                   16 * sizeof(jit_word_t));
988         _jitc->consts.vector.length = 16;
989     }
990     _jitc->consts.vector.offset = 0;
991 #endif
992
993     _jitc->function = NULL;
994
995     jit_reglive_setup();
996
997     undo.word = 0;
998     undo.node = NULL;
999     undo.const_offset = undo.patch_offset = 0;
1000 #  define assert_data(node)             /**/
1001 #define case_rr(name, type)                                             \
1002             case jit_code_##name##r##type:                              \
1003                 name##r##type(rn(node->u.w), rn(node->v.w));            \
1004                 break
1005 #define case_rw(name, type)                                             \
1006             case jit_code_##name##i##type:                              \
1007                 name##i##type(rn(node->u.w), node->v.w);                \
1008                 break
1009 #define case_wr(name, type)                                             \
1010             case jit_code_##name##i##type:                              \
1011                 name##i##type(node->u.w, rn(node->v.w));                \
1012                 break
1013 #define case_rrr(name, type)                                            \
1014             case jit_code_##name##r##type:                              \
1015                 name##r##type(rn(node->u.w),                            \
1016                               rn(node->v.w), rn(node->w.w));            \
1017                 break
1018 #define case_rrrr(name, type)                                           \
1019             case jit_code_##name##r##type:                              \
1020                 name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
1021                               rn(node->v.w), rn(node->w.w));            \
1022                 break
1023 #define case_rqr(name, type)                                            \
1024             case jit_code_##name##r##type:                              \
1025                 name##r##type(rn(node->u.w), rn(node->v.q.l),           \
1026                               rn(node->v.q.h), rn(node->w.w));          \
1027             case jit_code_##name##i##type:                              \
1028                 break;
1029 #define case_rrw(name, type)                                            \
1030             case jit_code_##name##i##type:                              \
1031                 name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
1032                 break
1033 #define case_rrrw(name, type)                                           \
1034             case jit_code_##name##i##type:                              \
1035                 name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
1036                               rn(node->v.w), node->w.w);                \
1037                 break
1038 #define case_rrf(name)                                                  \
1039             case jit_code_##name##i_f:                                  \
1040                 assert_data(node);                                      \
1041                 name##i_f(rn(node->u.w), rn(node->v.w), node->w.f);     \
1042                 break
1043 #define case_rrd(name)                                                  \
1044             case jit_code_##name##i_d:                                  \
1045                 assert_data(node);                                      \
1046                 name##i_d(rn(node->u.w), rn(node->v.w), node->w.d);     \
1047                 break
1048 #define case_wrr(name, type)                                            \
1049             case jit_code_##name##i##type:                              \
1050                 name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
1051                 break
1052 #define case_brr(name, type)                                            \
1053             case jit_code_##name##r##type:                              \
1054                 temp = node->u.n;                                       \
1055                 assert(temp->code == jit_code_label ||                  \
1056                        temp->code == jit_code_epilog);                  \
1057                 if (temp->flag & jit_flag_patch)                        \
1058                     name##r##type(temp->u.w, rn(node->v.w),             \
1059                                   rn(node->w.w));                       \
1060                 else {                                                  \
1061                     word = name##r##type(_jit->pc.w,                    \
1062                                          rn(node->v.w), rn(node->w.w)); \
1063                     patch(word, node);                                  \
1064                 }                                                       \
1065                 break
1066 #define case_brw(name, type)                                            \
1067             case jit_code_##name##i##type:                              \
1068                 temp = node->u.n;                                       \
1069                 assert(temp->code == jit_code_label ||                  \
1070                        temp->code == jit_code_epilog);                  \
1071                 if (temp->flag & jit_flag_patch)                        \
1072                     name##i##type(temp->u.w,                            \
1073                                   rn(node->v.w), node->w.w);            \
1074                 else {                                                  \
1075                     word = name##i##type(_jit->pc.w,                    \
1076                                          rn(node->v.w), node->w.w);     \
1077                     patch(word, node);                                  \
1078                 }                                                       \
1079                 break;
1080 #define case_brf(name)                                                  \
1081             case jit_code_##name##i_f:                                  \
1082                 temp = node->u.n;                                       \
1083                 assert(temp->code == jit_code_label ||                  \
1084                        temp->code == jit_code_epilog);                  \
1085                 if (temp->flag & jit_flag_patch)                        \
1086                     name##i_f(temp->u.w, rn(node->v.w), node->w.f);     \
1087                 else {                                                  \
1088                     word = name##i_f(_jit->pc.w, rn(node->v.w),         \
1089                                 node->w.f);                             \
1090                     patch(word, node);                                  \
1091                 }                                                       \
1092                 break
1093 #define case_brd(name)                                                  \
1094             case jit_code_##name##i_d:                                  \
1095                 temp = node->u.n;                                       \
1096                 assert(temp->code == jit_code_label ||                  \
1097                        temp->code == jit_code_epilog);                  \
1098                 if (temp->flag & jit_flag_patch)                        \
1099                     name##i_d(temp->u.w, rn(node->v.w), node->w.d);     \
1100                 else {                                                  \
1101                     word = name##i_d(_jit->pc.w, rn(node->v.w),         \
1102                                 node->w.d);                             \
1103                     patch(word, node);                                  \
1104                 }                                                       \
1105                 break
1106 #if DEVEL_DISASSEMBLER
1107     prevw = _jit->pc.w;
1108 #endif
1109     for (node = _jitc->head; node; node = node->next) {
1110         if (_jit->pc.uc >= _jitc->code.end)
1111             return (NULL);
1112
1113 #if DEVEL_DISASSEMBLER
1114         node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
1115         prevw = _jit->pc.w;
1116 #endif
1117         value = jit_classify(node->code);
1118         jit_regarg_set(node, value);
1119         switch (node->code) {
1120             case jit_code_align:
1121                 /* Must align to a power of two */
1122                 assert(!(node->u.w & (node->u.w - 1)));
1123                 if ((word = _jit->pc.w & (node->u.w - 1)))
1124                     nop(node->u.w - word);
1125                 break;
1126             case jit_code_skip:
1127                 nop((node->u.w + 3) & ~3);
1128                 break;
1129             case jit_code_note:         case jit_code_name:
1130                 node->u.w = _jit->pc.w;
1131                 break;
1132             case jit_code_label:
1133                 /* remember label is defined */
1134                 node->flag |= jit_flag_patch;
1135                 node->u.w = _jit->pc.w;
1136                 break;
1137                 case_rrr(add,);
1138                 case_rrw(add,);
1139                 case_rrr(addc,);
1140                 case_rrw(addc,);
1141                 case_rrr(addx,);
1142                 case_rrw(addx,);
1143                 case_rrr(sub,);
1144                 case_rrw(sub,);
1145                 case_rrr(subc,);
1146                 case_rrw(subc,);
1147                 case_rrr(subx,);
1148                 case_rrw(subx,);
1149                 case_rrw(rsb,);
1150                 case_rrr(mul,);
1151                 case_rrw(mul,);
1152                 case_rrr(hmul,);
1153                 case_rrw(hmul,);
1154                 case_rrr(hmul, _u);
1155                 case_rrw(hmul, _u);
1156                 case_rrrr(qmul,);
1157                 case_rrrw(qmul,);
1158                 case_rrrr(qmul, _u);
1159                 case_rrrw(qmul, _u);
1160                 case_rrr(div,);
1161                 case_rrw(div,);
1162                 case_rrr(div, _u);
1163                 case_rrw(div, _u);
1164                 case_rrrr(qdiv,);
1165                 case_rrrw(qdiv,);
1166                 case_rrrr(qdiv, _u);
1167                 case_rrrw(qdiv, _u);
1168                 case_rrr(rem,);
1169                 case_rrw(rem,);
1170                 case_rrr(rem, _u);
1171                 case_rrw(rem, _u);
1172                 case_rrr(lsh,);
1173                 case_rrw(lsh,);
1174                 case_rrr(rsh,);
1175                 case_rrw(rsh,);
1176 #define qlshr(r0, r1, r2, r3)   fallback_qlshr(r0, r1, r2, r3)
1177 #define qlshi(r0, r1, r2, i0)   fallback_qlshi(r0, r1, r2, i0)
1178 #define qlshr_u(r0, r1, r2, r3) fallback_qlshr_u(r0, r1, r2, r3)
1179 #define qlshi_u(r0, r1, r2, i0) fallback_qlshi_u(r0, r1, r2, i0)
1180                 case_rrrr(qlsh,);
1181                 case_rrrw(qlsh,);
1182                 case_rrrr(qlsh, _u);
1183                 case_rrrw(qlsh, _u);
1184                 case_rrr(rsh, _u);
1185                 case_rrw(rsh, _u);
1186 #define qrshr(r0, r1, r2, r3)   fallback_qrshr(r0, r1, r2, r3)
1187 #define qrshi(r0, r1, r2, i0)   fallback_qrshi(r0, r1, r2, i0)
1188 #define qrshr_u(r0, r1, r2, r3) fallback_qrshr_u(r0, r1, r2, r3)
1189 #define qrshi_u(r0, r1, r2, i0) fallback_qrshi_u(r0, r1, r2, i0)
1190                 case_rrrr(qrsh,);
1191                 case_rrrw(qrsh,);
1192                 case_rrrr(qrsh, _u);
1193                 case_rrrw(qrsh, _u);
1194 #define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2)
1195 #define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0)
1196 #define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2)
1197 #define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0)
1198                 case_rrr(lrot,);
1199                 case_rrw(lrot,);
1200                 case_rrr(rrot,);
1201                 case_rrw(rrot,);
1202                 case_rr(neg,);
1203                 case_rr(com,);
1204 #define clor(r0, r1)    fallback_clo(r0, r1)
1205 #define clzr(r0, r1)    fallback_clz(r0, r1)
1206 #define ctor(r0, r1)    fallback_cto(r0, r1)
1207 #define ctzr(r0, r1)    fallback_ctz(r0, r1)
1208 #define rbitr(r0, r1)   fallback_rbit(r0, r1)
1209 #define popcntr(r0, r1) fallback_popcnt(r0, r1)
1210                 case_rr(clo,);
1211                 case_rr(clz,);
1212                 case_rr(cto,);
1213                 case_rr(ctz,);
1214                 case_rr(rbit,);
1215                 case_rr(popcnt,);
1216                 case_rrr(and,);
1217                 case_rrw(and,);
1218                 case_rrr(or,);
1219                 case_rrw(or,);
1220                 case_rrr(xor,);
1221                 case_rrw(xor,);
1222                 case_rr(trunc, _f_i);
1223                 case_rr(trunc, _d_i);
1224                 case_rr(trunc, _f_l);
1225                 case_rr(trunc, _d_l);
1226                 case_rr(ld, _c);
1227                 case_rw(ld, _c);
1228                 case_rr(ld, _uc);
1229                 case_rw(ld, _uc);
1230                 case_rr(ld, _s);
1231                 case_rw(ld, _s);
1232                 case_rr(ld, _us);
1233                 case_rw(ld, _us);
1234                 case_rr(ld, _i);
1235                 case_rw(ld, _i);
1236                 case_rr(ld, _ui);
1237                 case_rw(ld, _ui);
1238                 case_rr(ld, _l);
1239                 case_rw(ld, _l);
1240                 case_rrr(ldx, _c);
1241                 case_rrw(ldx, _c);
1242                 case_rrr(ldx, _uc);
1243                 case_rrw(ldx, _uc);
1244                 case_rrr(ldx, _s);
1245                 case_rrw(ldx, _s);
1246                 case_rrr(ldx, _us);
1247                 case_rrw(ldx, _us);
1248                 case_rrr(ldx, _i);
1249                 case_rrw(ldx, _i);
1250                 case_rrr(ldx, _ui);
1251                 case_rrw(ldx, _ui);
1252                 case_rrr(ldx, _l);
1253                 case_rrw(ldx, _l);
1254             case jit_code_unldr:
1255                 unldr(rn(node->u.w), rn(node->v.w), node->w.w);
1256                 break;
1257             case jit_code_unldi:
1258                 unldi(rn(node->u.w), node->v.w, node->w.w);
1259                 break;
1260             case jit_code_unldr_u:
1261                 unldr_u(rn(node->u.w), rn(node->v.w), node->w.w);
1262                 break;
1263             case jit_code_unldi_u:
1264                 unldi_u(rn(node->u.w), node->v.w, node->w.w);
1265                 break;
1266                 case_rr(st, _c);
1267                 case_wr(st, _c);
1268                 case_rr(st, _s);
1269                 case_wr(st, _s);
1270                 case_rr(st, _i);
1271                 case_wr(st, _i);
1272                 case_rr(st, _l);
1273                 case_wr(st, _l);
1274                 case_rrr(stx, _c);
1275                 case_wrr(stx, _c);
1276                 case_rrr(stx, _s);
1277                 case_wrr(stx, _s);
1278                 case_rrr(stx, _i);
1279                 case_wrr(stx, _i);
1280                 case_rrr(stx, _l);
1281                 case_wrr(stx, _l);
1282             case jit_code_unstr:
1283                 unstr(rn(node->u.w), rn(node->v.w), node->w.w);
1284                 break;
1285             case jit_code_unsti:
1286                 unsti(node->u.w, rn(node->v.w), node->w.w);
1287                 break;
1288                 case_rr(hton, _us);
1289                 case_rr(hton, _ui);
1290                 case_rr(hton, _ul);
1291                 case_rr(bswap, _us);
1292                 case_rr(bswap, _ui);
1293                 case_rr(bswap, _ul);
1294 #define extr(r0, r1, i0, i1)    fallback_ext(r0, r1, i0, i1)
1295 #define extr_u(r0, r1, i0, i1)  fallback_ext_u(r0, r1, i0, i1)
1296 #define depr(r0, r1, i0, i1)    fallback_dep(r0, r1, i0, i1)
1297             case jit_code_extr:
1298                 extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1299                 break;
1300             case jit_code_extr_u:
1301                 extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1302                 break;
1303             case jit_code_depr:
1304                 depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
1305                 break;
1306             case jit_code_depi:
1307                 depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
1308                 break;
1309                 case_rr(ext, _c);
1310                 case_rr(ext, _uc);
1311                 case_rr(ext, _s);
1312                 case_rr(ext, _us);
1313                 case_rr(ext, _i);
1314                 case_rr(ext, _ui);
1315             case jit_code_casr:
1316                 casr(rn(node->u.w), rn(node->v.w),
1317                      rn(node->w.q.l), rn(node->w.q.h));
1318                 break;
1319             case jit_code_casi:
1320                 casi(rn(node->u.w), node->v.w,
1321                      rn(node->w.q.l), rn(node->w.q.h));
1322                 break;
1323                 case_rrr(movn,);
1324                 case_rrr(movz,);
1325                 case_rr(mov,);
1326             case jit_code_movi:
1327                 if (node->flag & jit_flag_node) {
1328                     temp = node->v.n;
1329                     if (temp->code == jit_code_data ||
1330                         (temp->code == jit_code_label &&
1331                          (temp->flag & jit_flag_patch)))
1332                         movi(rn(node->u.w), temp->u.w);
1333                     else {
1334                         assert(temp->code == jit_code_label ||
1335                                temp->code == jit_code_epilog);
1336                         word = movi_p(rn(node->u.w), temp->u.w);
1337                         patch(word, node);
1338                     }
1339                 }
1340                 else
1341                     movi(rn(node->u.w), node->v.w);
1342                 break;
1343                 case_rrr(lt,);
1344                 case_rrw(lt,);
1345                 case_rrr(lt, _u);
1346                 case_rrw(lt, _u);
1347                 case_rrr(le,);
1348                 case_rrw(le,);
1349                 case_rrr(le, _u);
1350                 case_rrw(le, _u);
1351                 case_rrr(eq,);
1352                 case_rrw(eq,);
1353                 case_rrr(ge,);
1354                 case_rrw(ge,);
1355                 case_rrr(ge, _u);
1356                 case_rrw(ge, _u);
1357                 case_rrr(gt,);
1358                 case_rrw(gt,);
1359                 case_rrr(gt, _u);
1360                 case_rrw(gt, _u);
1361                 case_rrr(ne,);
1362                 case_rrw(ne,);
1363                 case_brr(blt,);
1364                 case_brw(blt,);
1365                 case_brr(blt, _u);
1366                 case_brw(blt, _u);
1367                 case_brr(ble,);
1368                 case_brw(ble,);
1369                 case_brr(ble, _u);
1370                 case_brw(ble, _u);
1371                 case_brr(beq,);
1372                 case_brw(beq,);
1373                 case_brr(bge,);
1374                 case_brw(bge,);
1375                 case_brr(bge, _u);
1376                 case_brw(bge, _u);
1377                 case_brr(bgt,);
1378                 case_brw(bgt,);
1379                 case_brr(bgt, _u);
1380                 case_brw(bgt, _u);
1381                 case_brr(bne,);
1382                 case_brw(bne,);
1383                 case_brr(boadd,);
1384                 case_brw(boadd,);
1385                 case_brr(boadd, _u);
1386                 case_brw(boadd, _u);
1387                 case_brr(bxadd,);
1388                 case_brw(bxadd,);
1389                 case_brr(bxadd, _u);
1390                 case_brw(bxadd, _u);
1391                 case_brr(bosub,);
1392                 case_brw(bosub,);
1393                 case_brr(bosub, _u);
1394                 case_brw(bosub, _u);
1395                 case_brr(bxsub,);
1396                 case_brw(bxsub,);
1397                 case_brr(bxsub, _u);
1398                 case_brw(bxsub, _u);
1399                 case_brr(bms,);
1400                 case_brw(bms,);
1401                 case_brr(bmc,);
1402                 case_brw(bmc,);
1403                 case_rrr(add, _f);
1404                 case_rrf(add);
1405                 case_rrr(sub, _f);
1406                 case_rrf(sub);
1407                 case_rrf(rsb);
1408                 case_rrr(mul, _f);
1409                 case_rrf(mul);
1410                 case_rrr(div, _f);
1411                 case_rrf(div);
1412                 case_rr(abs, _f);
1413                 case_rr(neg, _f);
1414                 case_rr(sqrt, _f);
1415                 case_rqr(fma, _f);
1416                 case_rqr(fms, _f);
1417                 case_rqr(fnma, _f);
1418                 case_rqr(fnms, _f);
1419                 case_rr(ext, _f);
1420                 case_rr(ld, _f);
1421                 case_rw(ld, _f);
1422                 case_rrr(ldx, _f);
1423                 case_rrw(ldx, _f);
1424             case jit_code_unldr_x:
1425                 unldr_x(rn(node->u.w), rn(node->v.w), node->w.w);
1426                 break;
1427             case jit_code_unldi_x:
1428                 unldi_x(rn(node->u.w), node->v.w, node->w.w);
1429                 break;
1430                 case_rr(st, _f);
1431                 case_wr(st, _f);
1432                 case_rrr(stx, _f);
1433                 case_wrr(stx, _f);
1434             case jit_code_unstr_x:
1435                 unstr_x(rn(node->u.w), rn(node->v.w), node->w.w);
1436                 break;
1437             case jit_code_unsti_x:
1438                 unsti_x(node->u.w, rn(node->v.w), node->w.w);
1439                 break;
1440                 case_rr(mov, _f);
1441             case jit_code_movi_f:
1442                 assert_data(node);
1443                 movi_f(rn(node->u.w), node->v.f);
1444                 break;
1445                 case_rr(ext, _d_f);
1446                 case_rrr(lt, _f);
1447                 case_rrf(lt);
1448                 case_rrr(le, _f);
1449                 case_rrf(le);
1450                 case_rrr(eq, _f);
1451                 case_rrf(eq);
1452                 case_rrr(ge, _f);
1453                 case_rrf(ge);
1454                 case_rrr(gt, _f);
1455                 case_rrf(gt);
1456                 case_rrr(ne, _f);
1457                 case_rrf(ne);
1458                 case_rrr(unlt, _f);
1459                 case_rrf(unlt);
1460                 case_rrr(unle, _f);
1461                 case_rrf(unle);
1462                 case_rrr(uneq, _f);
1463                 case_rrf(uneq);
1464                 case_rrr(unge, _f);
1465                 case_rrf(unge);
1466                 case_rrr(ungt, _f);
1467                 case_rrf(ungt);
1468                 case_rrr(ltgt, _f);
1469                 case_rrf(ltgt);
1470                 case_rrr(ord, _f);
1471                 case_rrf(ord);
1472                 case_rrr(unord, _f);
1473                 case_rrf(unord);
1474                 case_brr(blt, _f);
1475                 case_brf(blt);
1476                 case_brr(ble, _f);
1477                 case_brf(ble);
1478                 case_brr(beq, _f);
1479                 case_brf(beq);
1480                 case_brr(bge, _f);
1481                 case_brf(bge);
1482                 case_brr(bgt, _f);
1483                 case_brf(bgt);
1484                 case_brr(bne, _f);
1485                 case_brf(bne);
1486                 case_brr(bunlt, _f);
1487                 case_brf(bunlt);
1488                 case_brr(bunle, _f);
1489                 case_brf(bunle);
1490                 case_brr(buneq, _f);
1491                 case_brf(buneq);
1492                 case_brr(bunge, _f);
1493                 case_brf(bunge);
1494                 case_brr(bungt, _f);
1495                 case_brf(bungt);
1496                 case_brr(bltgt, _f);
1497                 case_brf(bltgt);
1498                 case_brr(bord, _f);
1499                 case_brf(bord);
1500                 case_brr(bunord, _f);
1501                 case_brf(bunord);
1502                 case_rrr(add, _d);
1503                 case_rrd(add);
1504                 case_rrr(sub, _d);
1505                 case_rrd(sub);
1506                 case_rrd(rsb);
1507                 case_rrr(mul, _d);
1508                 case_rrd(mul);
1509                 case_rrr(div, _d);
1510                 case_rrd(div);
1511                 case_rr(abs, _d);
1512                 case_rr(neg, _d);
1513                 case_rr(sqrt, _d);
1514                 case_rqr(fma, _d);
1515                 case_rqr(fms, _d);
1516                 case_rqr(fnma, _d);
1517                 case_rqr(fnms, _d);
1518                 case_rr(ext, _d);
1519                 case_rr(ld, _d);
1520                 case_rw(ld, _d);
1521                 case_rrr(ldx, _d);
1522                 case_rrw(ldx, _d);
1523                 case_rr(st, _d);
1524                 case_wr(st, _d);
1525                 case_rrr(stx, _d);
1526                 case_wrr(stx, _d);
1527                 case_rr(mov, _d);
1528             case jit_code_movi_d:
1529                 assert_data(node);
1530                 movi_d(rn(node->u.w), node->v.d);
1531                 break;
1532                 case_rr(ext, _f_d);
1533                 case_rrr(lt, _d);
1534                 case_rrd(lt);
1535                 case_rrr(le, _d);
1536                 case_rrd(le);
1537                 case_rrr(eq, _d);
1538                 case_rrd(eq);
1539                 case_rrr(ge, _d);
1540                 case_rrd(ge);
1541                 case_rrr(gt, _d);
1542                 case_rrd(gt);
1543                 case_rrr(ne, _d);
1544                 case_rrd(ne);
1545                 case_rrr(unlt, _d);
1546                 case_rrd(unlt);
1547                 case_rrr(unle, _d);
1548                 case_rrd(unle);
1549                 case_rrr(uneq, _d);
1550                 case_rrd(uneq);
1551                 case_rrr(unge, _d);
1552                 case_rrd(unge);
1553                 case_rrr(ungt, _d);
1554                 case_rrd(ungt);
1555                 case_rrr(ltgt, _d);
1556                 case_rrd(ltgt);
1557                 case_rrr(ord, _d);
1558                 case_rrd(ord);
1559                 case_rrr(unord, _d);
1560                 case_rrd(unord);
1561                 case_brr(blt, _d);
1562                 case_brd(blt);
1563                 case_brr(ble, _d);
1564                 case_brd(ble);
1565                 case_brr(beq, _d);
1566                 case_brd(beq);
1567                 case_brr(bge, _d);
1568                 case_brd(bge);
1569                 case_brr(bgt, _d);
1570                 case_brd(bgt);
1571                 case_brr(bne, _d);
1572                 case_brd(bne);
1573                 case_brr(bunlt, _d);
1574                 case_brd(bunlt);
1575                 case_brr(bunle, _d);
1576                 case_brd(bunle);
1577                 case_brr(buneq, _d);
1578                 case_brd(buneq);
1579                 case_brr(bunge, _d);
1580                 case_brd(bunge);
1581                 case_brr(bungt, _d);
1582                 case_brd(bungt);
1583                 case_brr(bltgt, _d);
1584                 case_brd(bltgt);
1585                 case_brr(bord, _d);
1586                 case_brd(bord);
1587                 case_brr(bunord, _d);
1588                 case_brd(bunord);
1589             case jit_code_jmpr:
1590                 jit_check_frame();
1591                 jmpr(rn(node->u.w));
1592                 break;
1593             case jit_code_jmpi:
1594                 if (node->flag & jit_flag_node) {
1595                     temp = node->u.n;
1596                     assert(temp->code == jit_code_label ||
1597                            temp->code == jit_code_epilog);
1598                     if (temp->flag & jit_flag_patch)
1599                         jmpi(temp->u.w);
1600                     else {
1601                         word = _jit->code.length -
1602                             (_jit->pc.uc - _jit->code.ptr);
1603                         if (simm20_p(word))
1604                             word = jmpi(_jit->pc.w);
1605                         else
1606                         word = jmpi_p(_jit->pc.w);
1607                         patch(word, node);
1608                     }
1609                 }
1610                 else {
1611                     jit_check_frame();
1612                     jmpi(node->u.w);
1613                 }
1614                 break;
1615             case jit_code_callr:
1616                 jit_check_frame();
1617                 callr(rn(node->u.w));
1618                 break;
1619             case jit_code_calli:
1620                 if (node->flag & jit_flag_node) {
1621                     temp = node->u.n;
1622                     assert(temp->code == jit_code_label ||
1623                            temp->code == jit_code_epilog);
1624                     if (temp->flag & jit_flag_patch)
1625                         calli(temp->u.w);
1626                     else {
1627                         word = _jit->code.length -
1628                             (_jit->pc.uc - _jit->code.ptr);
1629                         if (simm20_p(word))
1630                             word = calli(_jit->pc.w);
1631                         else
1632                             word = calli_p(_jit->pc.w);
1633                         patch(word, node);
1634                     }
1635                 }
1636                 else {
1637                     jit_check_frame();
1638                     calli(node->u.w);
1639                 }
1640                 break;
1641             case jit_code_prolog:
1642                 _jitc->function = _jitc->functions.ptr + node->w.w;
1643                 undo.node = node;
1644                 undo.word = _jit->pc.w;
1645                 memcpy(&undo.func, _jitc->function, sizeof(undo.func));
1646 #if DEVEL_DISASSEMBLER
1647                 undo.prevw = prevw;
1648 #endif
1649                 undo.const_offset = _jitc->consts.vector.offset;
1650                 undo.patch_offset = _jitc->patches.offset;
1651             restart_function:
1652                 compute_framesize();
1653                 patch_alist(0);
1654                 _jitc->again = 0;
1655                 prolog(node);
1656                 break;
1657             case jit_code_epilog:
1658                 assert(_jitc->function == _jitc->functions.ptr + node->w.w);
1659                 if (_jitc->again) {
1660                     for (temp = undo.node->next;
1661                          temp != node; temp = temp->next) {
1662                         if (temp->code == jit_code_label ||
1663                             temp->code == jit_code_epilog)
1664                             temp->flag &= ~jit_flag_patch;
1665                     }
1666                     temp->flag &= ~jit_flag_patch;
1667                     node = undo.node;
1668                     _jit->pc.w = undo.word;
1669                     /* undo.func.self.aoff and undo.func.regset should not
1670                      * be undone, as they will be further updated, and are
1671                      * the reason of the undo. */
1672                     undo.func.self.aoff = _jitc->function->frame +
1673                         _jitc->function->self.aoff;
1674                     undo.func.need_frame = _jitc->function->need_frame;
1675                     jit_regset_set(&undo.func.regset, &_jitc->function->regset);
1676                     /* allocar information also does not need to be undone */
1677                     undo.func.aoffoff = _jitc->function->aoffoff;
1678                     undo.func.allocar = _jitc->function->allocar;
1679                     /* this will be recomputed but undo anyway to have it
1680                      * better self documented.*/
1681                     undo.func.need_stack = _jitc->function->need_stack;
1682                     memcpy(_jitc->function, &undo.func, sizeof(undo.func));
1683 #if DEVEL_DISASSEMBLER
1684                     prevw = undo.prevw;
1685 #endif
1686                     _jitc->patches.offset = undo.patch_offset;
1687                     _jitc->consts.vector.offset = undo.const_offset;
1688                     patch_alist(1);
1689                     goto restart_function;
1690                 }
1691                 /* remember label is defined */
1692                 node->flag |= jit_flag_patch;
1693                 node->u.w = _jit->pc.w;
1694                 epilog(node);
1695                 _jitc->function = NULL;
1696                 break;
1697             case jit_code_movr_w_f:
1698                 movr_w_f(rn(node->u.w), rn(node->v.w));
1699                 break;
1700             case jit_code_movr_f_w:
1701                 movr_f_w(rn(node->u.w), rn(node->v.w));
1702                 break;
1703             case jit_code_movi_f_w:
1704                 assert_data(node);
1705                 movi_f_w(rn(node->u.w), node->v.f);
1706                 break;
1707             case jit_code_movi_w_f:
1708                 movi_w_f(rn(node->u.w), node->v.w);
1709                 break;
1710             case jit_code_movr_w_d:
1711                 movr_w_d(rn(node->u.w), rn(node->v.w));
1712                 break;
1713             case jit_code_movr_d_w:
1714                 movr_d_w(rn(node->u.w), rn(node->v.w));
1715                 break;
1716             case jit_code_movi_d_w:
1717                 assert_data(node);
1718                 movi_d_w(rn(node->u.w), node->v.d);
1719                 break;
1720             case jit_code_movi_w_d:
1721                 movi_w_d(rn(node->u.w), node->v.w);
1722                 break;
1723             case jit_code_va_start:
1724                 vastart(rn(node->u.w));
1725                 break;
1726             case jit_code_va_arg:
1727                 vaarg(rn(node->u.w), rn(node->v.w));
1728                 break;
1729             case jit_code_va_arg_d:
1730                 vaarg_d(rn(node->u.w), rn(node->v.w));
1731                 break;
1732             case jit_code_live:                 case jit_code_ellipsis:
1733             case jit_code_va_push:
1734             case jit_code_allocai:              case jit_code_allocar:
1735             case jit_code_arg_c:                case jit_code_arg_s:
1736             case jit_code_arg_i:
1737             case jit_code_arg_l:
1738             case jit_code_arg_f:                case jit_code_arg_d:
1739             case jit_code_va_end:
1740             case jit_code_ret:
1741             case jit_code_retr_c:               case jit_code_reti_c:
1742             case jit_code_retr_uc:              case jit_code_reti_uc:
1743             case jit_code_retr_s:               case jit_code_reti_s:
1744             case jit_code_retr_us:              case jit_code_reti_us:
1745             case jit_code_retr_i:               case jit_code_reti_i:
1746             case jit_code_retr_ui:              case jit_code_reti_ui:
1747             case jit_code_retr_l:               case jit_code_reti_l:
1748             case jit_code_retr_f:               case jit_code_reti_f:
1749             case jit_code_retr_d:               case jit_code_reti_d:
1750             case jit_code_getarg_c:             case jit_code_getarg_uc:
1751             case jit_code_getarg_s:             case jit_code_getarg_us:
1752             case jit_code_getarg_i:             case jit_code_getarg_ui:
1753             case jit_code_getarg_l:
1754             case jit_code_getarg_f:             case jit_code_getarg_d:
1755             case jit_code_putargr_c:            case jit_code_putargi_c:
1756             case jit_code_putargr_uc:           case jit_code_putargi_uc:
1757             case jit_code_putargr_s:            case jit_code_putargi_s:
1758             case jit_code_putargr_us:           case jit_code_putargi_us:
1759             case jit_code_putargr_i:            case jit_code_putargi_i:
1760             case jit_code_putargr_ui:           case jit_code_putargi_ui:
1761             case jit_code_putargr_l:            case jit_code_putargi_l:
1762             case jit_code_putargr_f:            case jit_code_putargi_f:
1763             case jit_code_putargr_d:            case jit_code_putargi_d:
1764             case jit_code_pushargr_c:           case jit_code_pushargi_c:
1765             case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
1766             case jit_code_pushargr_s:           case jit_code_pushargi_s:
1767             case jit_code_pushargr_us:          case jit_code_pushargi_us:
1768             case jit_code_pushargr_i:           case jit_code_pushargi_i:
1769             case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
1770             case jit_code_pushargr_l:           case jit_code_pushargi_l:
1771             case jit_code_pushargr_f:           case jit_code_pushargi_f:
1772             case jit_code_pushargr_d:           case jit_code_pushargi_d:
1773             case jit_code_retval_c:             case jit_code_retval_uc:
1774             case jit_code_retval_s:             case jit_code_retval_us:
1775             case jit_code_retval_i:
1776             case jit_code_retval_ui:            case jit_code_retval_l:
1777             case jit_code_retval_f:             case jit_code_retval_d:
1778             case jit_code_prepare:
1779             case jit_code_finishr:              case jit_code_finishi:
1780             case jit_code_negi_f:               case jit_code_absi_f:
1781             case jit_code_sqrti_f:              case jit_code_negi_d:
1782             case jit_code_absi_d:               case jit_code_sqrti_d:
1783                 break;
1784             case jit_code_negi:
1785                 negi(rn(node->u.w), node->v.w);
1786                 break;
1787             case jit_code_comi:
1788                 comi(rn(node->u.w), node->v.w);
1789                 break;
1790             case jit_code_exti_c:
1791                 exti_c(rn(node->u.w), node->v.w);
1792                 break;
1793             case jit_code_exti_uc:
1794                 exti_uc(rn(node->u.w), node->v.w);
1795                 break;
1796             case jit_code_exti_s:
1797                 exti_s(rn(node->u.w), node->v.w);
1798                 break;
1799             case jit_code_exti_us:
1800                 exti_us(rn(node->u.w), node->v.w);
1801                 break;
1802             case jit_code_bswapi_us:
1803                 bswapi_us(rn(node->u.w), node->v.w);
1804                 break;
1805             case jit_code_bswapi_ui:
1806                 bswapi_ui(rn(node->u.w), node->v.w);
1807                 break;
1808             case jit_code_htoni_us:
1809                 htoni_us(rn(node->u.w), node->v.w);
1810                 break;
1811             case jit_code_htoni_ui:
1812                 htoni_ui(rn(node->u.w), node->v.w);
1813                 break;
1814 #if __WORDSIZE == 64
1815             case jit_code_exti_i:
1816                 exti_i(rn(node->u.w), node->v.w);
1817                 break;
1818             case jit_code_exti_ui:
1819                 exti_ui(rn(node->u.w), node->v.w);
1820                 break;
1821             case jit_code_bswapi_ul:
1822                 bswapi_ul(rn(node->u.w), node->v.w);
1823                 break;
1824             case jit_code_htoni_ul:
1825                 htoni_ul(rn(node->u.w), node->v.w);
1826                 break;
1827 #endif
1828             case jit_code_cloi:
1829                 cloi(rn(node->u.w), node->v.w);
1830                 break;
1831             case jit_code_clzi:
1832                 clzi(rn(node->u.w), node->v.w);
1833                 break;
1834             case jit_code_ctoi:
1835                 ctoi(rn(node->u.w), node->v.w);
1836                 break;
1837             case jit_code_ctzi:
1838                 ctzi(rn(node->u.w), node->v.w);
1839                 break;
1840             case jit_code_rbiti:
1841                 rbiti(rn(node->u.w), node->v.w);
1842                 break;
1843             case jit_code_popcnti:
1844                 popcnti(rn(node->u.w), node->v.w);
1845                 break;
1846             case jit_code_exti:
1847                 exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
1848                 break;
1849             case jit_code_exti_u:
1850                 exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
1851                 break;
1852             default:
1853                 abort();
1854         }
1855         if (jit_carry != _NOREG) {
1856             switch (node->code) {
1857                 case jit_code_note:
1858                 case jit_code_addcr:            case jit_code_addci:
1859                 case jit_code_addxr:            case jit_code_addxi:
1860                 case jit_code_subcr:            case jit_code_subci:
1861                 case jit_code_subxr:            case jit_code_subxi:
1862                     break;
1863                 default:
1864                     jit_unget_reg(jit_carry);
1865                     jit_carry = _NOREG;
1866                     break;
1867             }
1868         }
1869         jit_regarg_clr(node, value);
1870         assert(_jitc->regarg == 0 ||
1871                (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry)));
1872         assert(_jitc->synth == 0);
1873         /* update register live state */
1874         jit_reglive(node);
1875     }
1876 #undef case_brw
1877 #undef case_brr
1878 #undef case_wrr
1879 #undef case_rrw
1880 #undef case_rrr
1881 #undef case_wr
1882 #undef case_rw
1883 #undef case_rr
1884
1885 #if __WORDSIZE == 64
1886     /* Record all constants to be patched */
1887     for (offset = 0; offset < _jitc->patches.offset; offset++) {
1888         node = _jitc->patches.ptr[offset].node;
1889         value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
1890         put_const(value);
1891     }
1892     /* Record all direct constants */
1893     for (offset = 0; offset < _jitc->consts.vector.offset; offset++)
1894         put_const(_jitc->consts.vector.values[offset]);
1895     /* Now actually inject constants at the end of code buffer */
1896     if (_jitc->consts.hash.count) {
1897         jit_const_t     *entry;
1898         /* Insert nop if aligned at 4 bytes */
1899         if (_jit->pc.w % sizeof(jit_word_t))
1900             nop(_jit->pc.w % sizeof(jit_word_t));
1901         for (offset = 0; offset < _jitc->consts.hash.size; offset++) {
1902             entry = _jitc->consts.hash.table[offset];
1903             for (; entry; entry = entry->next) {
1904                 /* Make sure to not write out of bounds */
1905                 if (_jit->pc.uc >= _jitc->code.end)
1906                     return (NULL);
1907                 entry->address = _jit->pc.w;
1908                 *_jit->pc.ul++ = entry->value;
1909             }
1910         }
1911     }
1912 #endif
1913
1914     for (offset = 0; offset < _jitc->patches.offset; offset++) {
1915         node = _jitc->patches.ptr[offset].node;
1916         word = _jitc->patches.ptr[offset].inst;
1917         value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
1918         patch_at(word, value);
1919     }
1920
1921 #if __WORDSIZE == 64
1922     /* Patch direct complex constants */
1923     if (_jitc->consts.vector.instrs) {
1924         for (offset = 0; offset < _jitc->consts.vector.offset; offset++)
1925             patch_at(_jitc->consts.vector.instrs[offset],
1926                      _jitc->consts.vector.values[offset]);
1927         jit_free((jit_pointer_t *)&_jitc->consts.vector.instrs);
1928         jit_free((jit_pointer_t *)&_jitc->consts.vector.values);
1929     }
1930
1931     /* Hash table no longer need */
1932     if (_jitc->consts.hash.table) {
1933         jit_free((jit_pointer_t *)&_jitc->consts.hash.table);
1934         for (offset = 0; offset < _jitc->consts.pool.length; offset++)
1935             jit_free((jit_pointer_t *)_jitc->consts.pool.ptr + offset);
1936         jit_free((jit_pointer_t *)&_jitc->consts.pool.ptr);
1937     }
1938 #endif
1939
1940     jit_flush(_jit->code.ptr, _jit->pc.uc);
1941
1942     return (_jit->code.ptr);
1943 }
1944
1945 #define CODE                            1
1946 #  include "jit_riscv-cpu.c"
1947 #  include "jit_riscv-fpu.c"
1948 #  include "jit_fallback.c"
1949 #undef CODE
1950
1951 static void
1952 _load_const(jit_state_t *_jit, jit_int32_t reg, jit_word_t value)
1953 {
1954     if (_jitc->consts.vector.offset >= _jitc->consts.vector.length) {
1955         jit_word_t      new_size = _jitc->consts.vector.length *
1956                                    2 * sizeof(jit_word_t);
1957         jit_realloc((jit_pointer_t *)&_jitc->consts.vector.instrs,
1958                     _jitc->consts.vector.length * sizeof(jit_word_t), new_size);
1959         jit_realloc((jit_pointer_t *)&_jitc->consts.vector.values,
1960                     _jitc->consts.vector.length * sizeof(jit_word_t), new_size);
1961         _jitc->consts.vector.length *= 2;
1962     }
1963     _jitc->consts.vector.instrs[_jitc->consts.vector.offset] = _jit->pc.w;
1964     _jitc->consts.vector.values[_jitc->consts.vector.offset] = value;
1965     ++_jitc->consts.vector.offset;
1966     /* Resolve later the pc relative address */
1967     put_const(value);
1968     AUIPC(reg, 0);
1969     ADDI(reg, reg, 0);
1970     LD(reg, reg, 0);
1971 }
1972
1973 static jit_word_t
1974 hash_const(jit_word_t value)
1975 {
1976     const jit_uint8_t   *ptr;
1977     jit_word_t           i, key;
1978     for (i = key = 0, ptr = (jit_uint8_t *)&value; i < 4; ++i)
1979         key = (key << (key & 1)) ^ ptr[i];
1980     return (key);
1981
1982 }
1983
1984 static void
1985 _put_const(jit_state_t *_jit, jit_word_t value)
1986 {
1987     jit_word_t           key;
1988     jit_const_t         *entry;
1989
1990     /* Check if already inserted in table */
1991     key = hash_const(value) % _jitc->consts.hash.size;
1992     for (entry = _jitc->consts.hash.table[key]; entry; entry = entry->next) {
1993         if (entry->value == value)
1994             return;
1995     }
1996
1997     /* Check if need to increase pool size */
1998     if (_jitc->consts.pool.list->next == NULL) {
1999         jit_const_t     *list;
2000         jit_word_t       offset;
2001         jit_word_t       new_size = (_jitc->consts.pool.length + 1) *
2002                                     sizeof(jit_const_t*);
2003         jit_realloc((jit_pointer_t *)&_jitc->consts.pool.ptr,
2004                     _jitc->consts.pool.length * sizeof(jit_const_t*), new_size);
2005         jit_alloc((jit_pointer_t *)
2006                   _jitc->consts.pool.ptr + _jitc->consts.pool.length,
2007                   1024 * sizeof(jit_const_t));
2008         list = _jitc->consts.pool.ptr[_jitc->consts.pool.length];
2009         _jitc->consts.pool.list->next = list;
2010         for (offset = 0; offset < 1023; ++offset, ++list)
2011             list->next = list + 1;
2012         list->next = NULL;
2013         ++_jitc->consts.pool.length;
2014     }
2015
2016     /* Rehash if more than 75% used table */
2017     if (_jitc->consts.hash.count > (_jitc->consts.hash.size / 4) * 3) {
2018         jit_word_t        i, k;
2019         jit_const_t      *next;
2020         jit_const_t     **table;
2021         jit_alloc((jit_pointer_t *)&table,
2022                   _jitc->consts.hash.size * 2 * sizeof(jit_const_t *));
2023         for (i = 0; i < _jitc->consts.hash.size; ++i) {
2024             for (entry = _jitc->consts.hash.table[i]; entry; entry = next) {
2025                 next = entry->next;
2026                 k = hash_const(entry->value) % (_jitc->consts.hash.size * 2);
2027                 entry->next = table[k];
2028                 table[k] = entry;
2029             }
2030         }
2031         jit_free((jit_pointer_t *)&_jitc->consts.hash.table);
2032         _jitc->consts.hash.size *= 2;
2033         _jitc->consts.hash.table = table;
2034     }
2035
2036     /* Insert in hash */
2037     entry = _jitc->consts.pool.list;
2038     _jitc->consts.pool.list =  entry->next;
2039     ++_jitc->consts.hash.count;
2040     entry->value = value;
2041     entry->next = _jitc->consts.hash.table[key];
2042     _jitc->consts.hash.table[key] = entry;
2043 }
2044
2045 static jit_word_t
2046 _get_const(jit_state_t *_jit, jit_word_t value)
2047 {
2048     jit_word_t           key;
2049     jit_const_t         *entry;
2050     key = hash_const(value) % _jitc->consts.hash.size;
2051     for (entry = _jitc->consts.hash.table[key]; entry; entry = entry->next) {
2052         if (entry->value == value)
2053             return (entry->address);
2054     }
2055     /* Only the final patch should call get_const() */
2056     abort();
2057 }
2058
2059 void
2060 jit_flush(void *fptr, void *tptr)
2061 {
2062 #if defined(__GNUC__)
2063     jit_word_t          f, t, s;
2064
2065     s = sysconf(_SC_PAGE_SIZE);
2066     f = (jit_word_t)fptr & -s;
2067     t = (((jit_word_t)tptr) + s - 1) & -s;
2068     __clear_cache((void *)f, (void *)t);
2069 #endif
2070 }
2071
2072 void
2073 _emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2074 {
2075     ldxi(rn(r0), rn(r1), i0);
2076 }
2077
2078 void
2079 _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2080 {
2081     stxi(i0, rn(r0), rn(r1));
2082 }
2083
2084 void
2085 _emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2086 {
2087     ldxi_d(rn(r0), rn(r1), i0);
2088 }
2089
2090 void
2091 _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2092 {
2093     stxi_d(i0, rn(r0), rn(r1));
2094 }
2095
2096 #if __WORDSIZE != 64
2097 # error "only 64 bit ports tested"
2098 #endif
2099 static void
2100 _compute_framesize(jit_state_t *_jit)
2101 {
2102     jit_int32_t         reg;
2103     _jitc->framesize = 16;      /* ra+fp */
2104     for (reg = 0; reg < jit_size(iregs); reg++)
2105         if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
2106             _jitc->framesize += sizeof(jit_word_t);
2107
2108     for (reg = 0; reg < jit_size(fregs); reg++)
2109         if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
2110             _jitc->framesize += sizeof(jit_float64_t);
2111
2112     /* Space to store variadic arguments */
2113     if (_jitc->function->self.call & jit_call_varargs)
2114         _jitc->framesize += (8 - _jitc->function->vagp) * 8;
2115
2116     /* Make sure functions called have a 16 byte aligned stack */
2117     _jitc->framesize = (_jitc->framesize + 15) & -16;
2118 }
2119
2120 static void
2121 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
2122 {
2123     jit_int32_t          flag;
2124
2125     assert(node->flag & jit_flag_node);
2126     if (node->code == jit_code_movi)
2127         flag = node->v.n->flag;
2128     else
2129         flag = node->u.n->flag;
2130     assert(!(flag & jit_flag_patch));
2131     if (_jitc->patches.offset >= _jitc->patches.length) {
2132         jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
2133                     _jitc->patches.length * sizeof(jit_patch_t),
2134                     (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
2135         _jitc->patches.length += 1024;
2136     }
2137     _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
2138     _jitc->patches.ptr[_jitc->patches.offset].node = node;
2139     ++_jitc->patches.offset;
2140 }