X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Fjit_riscv.c;h=27b0c5adada6b32a0e97378e15900f7373c9e101;hb=d481fb64f2aac7a36532142cda11fa43f5ca792f;hp=55b2391490f6120f91c63a12741c4a0d408d0373;hpb=70575e81838e2c8d842dd28c3fc7fbb91b395061;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/jit_riscv.c b/deps/lightning/lib/jit_riscv.c index 55b23914..27b0c5ad 100644 --- a/deps/lightning/lib/jit_riscv.c +++ b/deps/lightning/lib/jit_riscv.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Free Software Foundation, Inc. + * Copyright (C) 2019-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -17,6 +17,10 @@ * Paulo Cesar Pereira de Andrade */ +/* callee save + variadic arguments + * align16(ra+fp+s[1-9]+s10+s11+fs[0-9]+fs10+fs11)+align16(a[0-7]) */ +#define stack_framesize (208 + 64) + #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8) #define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) @@ -28,17 +32,30 @@ typedef jit_pointer_t jit_va_list_t; /* * Prototypes */ +#define compute_framesize() _compute_framesize(_jit) +static void _compute_framesize(jit_state_t*); +#if __WORDSIZE == 64 +# define load_const(r0, i0) _load_const(_jit, r0, i0) +static void _load_const(jit_state_t*, jit_int32_t, jit_word_t); +static jit_word_t hash_const(jit_word_t); +# define put_const(i0) _put_const(_jit, i0) +static void _put_const(jit_state_t*, jit_word_t); +# define get_const(i0) _get_const(_jit, i0) +static jit_word_t _get_const(jit_state_t*, jit_word_t); +#endif #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" +# include "jit_fallback.c" #undef PROTO /* * Initialization */ +jit_cpu_t jit_cpu; jit_register_t _rvs[] = { { 0x00, "zero" }, { 0x01, "ra" }, @@ -110,12 +127,22 @@ jit_register_t _rvs[] = { { _NOREG, "" }, }; +static jit_int32_t iregs[] = { + _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10, _S11 +}; + +static jit_int32_t fregs[] = { + _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7, _FS8, _FS9, _FS10, _FS11 +}; + /* * Implementation */ void jit_get_cpu(void) { + /* By default generate extra instructions for unaligned load/store. */ + jit_cpu.unaligned = 0; } void @@ -171,6 +198,7 @@ jit_int32_t _jit_allocai(jit_state_t *_jit, jit_int32_t length) { assert(_jitc->function); + jit_check_frame(); switch (length) { case 0: case 1: break; case 2: _jitc->function->self.aoff &= -2; break; @@ -219,20 +247,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -292,16 +318,17 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); - return (jit_arg_f_reg_p(u->u.w)); + return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8)); } void _jit_ellipsis(jit_state_t *_jit) { jit_inc_synth(ellipsis); + jit_check_frame(); if (_jitc->prepare) { jit_link_prepare(); assert(!(_jitc->function->call.call & jit_call_varargs)); @@ -325,19 +352,23 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -359,6 +390,7 @@ _jit_arg_f(jit_state_t *_jit) else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_f, offset, ++_jitc->function->self.argn); @@ -382,6 +414,7 @@ _jit_arg_d(jit_state_t *_jit) else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_d, offset, ++_jitc->function->self.argn); @@ -392,111 +425,129 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, JIT_RA0 - v->u.w); - else - jit_ldxi_c(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_c(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, JIT_RA0 - v->u.w); - else - jit_ldxi_uc(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_uc(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, JIT_RA0 - v->u.w); - else - jit_ldxi_s(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_s(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, JIT_RA0 - v->u.w); - else - jit_ldxi_us(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_us(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_i(u, JIT_RA0 - v->u.w); - else - jit_ldxi_i(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_i(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, JIT_RA0 - v->u.w); - else - jit_ldxi_ui(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_ui(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); - else - jit_ldxi_l(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_l(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(JIT_RA0 - v->u.w, u); - else - jit_stxi(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(v->u.w, JIT_FP, regno); + node = jit_stxi(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -511,8 +562,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_f(u, JIT_FA0 - v->u.w); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8)); - else - jit_ldxi_f(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_f(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -525,8 +578,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_f(JIT_FA0 - v->u.w, u); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u); - else - jit_stxi_f(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi_f(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -538,18 +593,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) jit_inc_synth_fp(putargi_f, u, v); if (jit_arg_f_reg_p(v->u.w)) jit_movi_f(JIT_FA0 - v->u.w, u); - else if (jit_arg_reg_p(v->u.w - 8)) { - union { - jit_float32_t f; - jit_int32_t i; - } uu; - uu.f = u; - jit_movi(JIT_RA0 - (v->u.w - 8), uu.i); - } + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); - jit_stxi_f(v->u.w, JIT_FP, regno); + node = jit_stxi_f(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -564,8 +615,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_d(u, JIT_FA0 - v->u.w); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8)); - else - jit_ldxi_d(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_d(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -578,8 +631,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_d(JIT_FA0 - v->u.w, u); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u); - else - jit_stxi_d(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi_d(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -591,28 +646,24 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) jit_inc_synth_dp(putargi_d, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi_d(JIT_FA0 - v->u.w, u); - else if (jit_arg_reg_p(v->u.w - 8)) { - union { - jit_float64_t d; - jit_int64_t w; - } uu; - uu.d = u; - jit_movi(JIT_RA0 - (v->u.w - 8), uu.w); - } + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); - jit_stxi_d(v->u.w, JIT_FP, regno); + node = jit_stxi_d(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(JIT_RA0 - _jitc->function->call.argi, u); @@ -621,16 +672,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) else { jit_stxi(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); @@ -642,6 +694,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) jit_stxi(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -664,6 +717,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) else { jit_stxi_f(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -690,6 +744,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -712,6 +767,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) else { jit_stxi_d(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -738,6 +794,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -766,6 +823,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) { jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishr, r0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; @@ -783,6 +841,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) { jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishi, (jit_word_t)i0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; @@ -883,6 +942,7 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -893,6 +953,43 @@ _emit_code(jit_state_t *_jit) jit_word_t prevw; #endif +#if __WORDSIZE == 64 + if (!_jitc->consts.hash.table) { + jit_alloc((jit_pointer_t *)&_jitc->consts.hash.table, + 16 * sizeof(jit_const_t *)); + _jitc->consts.hash.size = 16; + jit_alloc((jit_pointer_t *)&_jitc->consts.pool.ptr, + sizeof(jit_const_t *)); + jit_alloc((jit_pointer_t *)_jitc->consts.pool.ptr, + 1024 * sizeof(jit_const_t)); + _jitc->consts.pool.length = 1; + } + /* Reset table if starting over jit generation */ + else + memset(_jitc->consts.hash.table, 0, + _jitc->consts.hash.size * sizeof(jit_word_t)); + for (offset = 0; offset < _jitc->consts.pool.length; offset++) { + jit_int32_t i; + jit_const_t *list = _jitc->consts.pool.ptr[offset]; + for (i = 0; i < 1023; ++i, ++list) + list->next = list + 1; + if (offset + 1 < _jitc->consts.pool.length) + list->next = _jitc->consts.pool.ptr[offset + 1]; + else + list->next = NULL; + } + _jitc->consts.pool.list = _jitc->consts.pool.ptr[0]; + _jitc->consts.hash.count = 0; + if (!_jitc->consts.vector.instrs) { + jit_alloc((jit_pointer_t *)&_jitc->consts.vector.instrs, + 16 * sizeof(jit_word_t)); + jit_alloc((jit_pointer_t *)&_jitc->consts.vector.values, + 16 * sizeof(jit_word_t)); + _jitc->consts.vector.length = 16; + } + _jitc->consts.vector.offset = 0; +#endif + _jitc->function = NULL; jit_reglive_setup(); @@ -923,6 +1020,12 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rqr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.q.l), \ + rn(node->v.q.h), rn(node->w.w)); \ + case jit_code_##name##i##type: \ + break; #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ @@ -1015,11 +1118,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if (node->u.w == sizeof(jit_word_t) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); + /* Must align to a power of two */ + assert(!(node->u.w & (node->u.w - 1))); + if ((word = _jit->pc.w & (node->u.w - 1))) + nop(node->u.w - word); + break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; @@ -1044,6 +1149,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); + case_rrr(hmul,); + case_rrw(hmul,); + case_rrr(hmul, _u); + case_rrw(hmul, _u); case_rrrr(qmul,); case_rrrw(qmul,); case_rrrr(qmul, _u); @@ -1064,10 +1173,46 @@ _emit_code(jit_state_t *_jit) case_rrw(lsh,); case_rrr(rsh,); case_rrw(rsh,); +#define qlshr(r0, r1, r2, r3) fallback_qlshr(r0, r1, r2, r3) +#define qlshi(r0, r1, r2, i0) fallback_qlshi(r0, r1, r2, i0) +#define qlshr_u(r0, r1, r2, r3) fallback_qlshr_u(r0, r1, r2, r3) +#define qlshi_u(r0, r1, r2, i0) fallback_qlshi_u(r0, r1, r2, i0) + case_rrrr(qlsh,); + case_rrrw(qlsh,); + case_rrrr(qlsh, _u); + case_rrrw(qlsh, _u); case_rrr(rsh, _u); case_rrw(rsh, _u); +#define qrshr(r0, r1, r2, r3) fallback_qrshr(r0, r1, r2, r3) +#define qrshi(r0, r1, r2, i0) fallback_qrshi(r0, r1, r2, i0) +#define qrshr_u(r0, r1, r2, r3) fallback_qrshr_u(r0, r1, r2, r3) +#define qrshi_u(r0, r1, r2, i0) fallback_qrshi_u(r0, r1, r2, i0) + case_rrrr(qrsh,); + case_rrrw(qrsh,); + case_rrrr(qrsh, _u); + case_rrrw(qrsh, _u); +#define lrotr(r0,r1,r2) fallback_lrotr(r0,r1,r2) +#define lroti(r0,r1,i0) fallback_lroti(r0,r1,i0) +#define rrotr(r0,r1,r2) fallback_rrotr(r0,r1,r2) +#define rroti(r0,r1,i0) fallback_rroti(r0,r1,i0) + case_rrr(lrot,); + case_rrw(lrot,); + case_rrr(rrot,); + case_rrw(rrot,); case_rr(neg,); case_rr(com,); +#define clor(r0, r1) fallback_clo(r0, r1) +#define clzr(r0, r1) fallback_clz(r0, r1) +#define ctor(r0, r1) fallback_cto(r0, r1) +#define ctzr(r0, r1) fallback_ctz(r0, r1) +#define rbitr(r0, r1) fallback_rbit(r0, r1) +#define popcntr(r0, r1) fallback_popcnt(r0, r1) + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); + case_rr(rbit,); + case_rr(popcnt,); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -1106,6 +1251,18 @@ _emit_code(jit_state_t *_jit) case_rrw(ldx, _ui); case_rrr(ldx, _l); case_rrw(ldx, _l); + case jit_code_unldr: + unldr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi: + unldi(rn(node->u.w), node->v.w, node->w.w); + break; + case jit_code_unldr_u: + unldr_u(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_u: + unldi_u(rn(node->u.w), node->v.w, node->w.w); + break; case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); @@ -1122,15 +1279,49 @@ _emit_code(jit_state_t *_jit) case_wrr(stx, _i); case_rrr(stx, _l); case_wrr(stx, _l); + case jit_code_unstr: + unstr(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti: + unsti(node->u.w, rn(node->v.w), node->w.w); + break; case_rr(hton, _us); case_rr(hton, _ui); case_rr(hton, _ul); + case_rr(bswap, _us); + case_rr(bswap, _ui); + case_rr(bswap, _ul); +#define extr(r0, r1, i0, i1) fallback_ext(r0, r1, i0, i1) +#define extr_u(r0, r1, i0, i1) fallback_ext_u(r0, r1, i0, i1) +#define depr(r0, r1, i0, i1) fallback_dep(r0, r1, i0, i1) + case jit_code_extr: + extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_extr_u: + extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_depr: + depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h); + break; + case jit_code_depi: + depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; + case_rrr(movn,); + case_rrr(movz,); case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1221,15 +1412,31 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _f); case_rr(neg, _f); case_rr(sqrt, _f); + case_rqr(fma, _f); + case_rqr(fms, _f); + case_rqr(fnma, _f); + case_rqr(fnms, _f); case_rr(ext, _f); case_rr(ld, _f); case_rw(ld, _f); case_rrr(ldx, _f); case_rrw(ldx, _f); + case jit_code_unldr_x: + unldr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unldi_x: + unldi_x(rn(node->u.w), node->v.w, node->w.w); + break; case_rr(st, _f); case_wr(st, _f); case_rrr(stx, _f); case_wrr(stx, _f); + case jit_code_unstr_x: + unstr_x(rn(node->u.w), rn(node->v.w), node->w.w); + break; + case jit_code_unsti_x: + unsti_x(node->u.w, rn(node->v.w), node->w.w); + break; case_rr(mov, _f); case jit_code_movi_f: assert_data(node); @@ -1304,6 +1511,10 @@ _emit_code(jit_state_t *_jit) case_rr(abs, _d); case_rr(neg, _d); case_rr(sqrt, _d); + case_rqr(fma, _d); + case_rqr(fms, _d); + case_rqr(fnma, _d); + case_rqr(fnms, _d); case_rr(ext, _d); case_rr(ld, _d); case_rw(ld, _d); @@ -1376,6 +1587,7 @@ _emit_code(jit_state_t *_jit) case_brr(bunord, _d); case_brd(bunord); case jit_code_jmpr: + jit_check_frame(); jmpr(rn(node->u.w)); break; case jit_code_jmpi: @@ -1386,14 +1598,22 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (simm20_p(word)) + word = jmpi(_jit->pc.w); + else word = jmpi_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); jmpi(node->u.w); + } break; case jit_code_callr: + jit_check_frame(); callr(rn(node->u.w)); break; case jit_code_calli: @@ -1404,22 +1624,33 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) calli(temp->u.w); else { - word = calli_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (simm20_p(word)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); calli(node->u.w); + } break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif + undo.const_offset = _jitc->consts.vector.offset; undo.patch_offset = _jitc->patches.offset; restart_function: + compute_framesize(); + patch_alist(0); _jitc->again = 0; prolog(node); break; @@ -1435,10 +1666,26 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + undo.func.need_frame = _jitc->function->need_frame; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + /* this will be recomputed but undo anyway to have it + * better self documented.*/ + undo.func.need_stack = _jitc->function->need_stack; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif _jitc->patches.offset = undo.patch_offset; + _jitc->consts.vector.offset = undo.const_offset; + patch_alist(1); goto restart_function; } /* remember label is defined */ @@ -1457,6 +1704,9 @@ _emit_code(jit_state_t *_jit) assert_data(node); movi_f_w(rn(node->u.w), node->v.f); break; + case jit_code_movi_w_f: + movi_w_f(rn(node->u.w), node->v.w); + break; case jit_code_movr_w_d: movr_w_d(rn(node->u.w), rn(node->v.w)); break; @@ -1467,6 +1717,9 @@ _emit_code(jit_state_t *_jit) assert_data(node); movi_d_w(rn(node->u.w), node->v.d); break; + case jit_code_movi_w_d: + movi_w_d(rn(node->u.w), node->v.w); + break; case jit_code_va_start: vastart(rn(node->u.w)); break; @@ -1479,11 +1732,19 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: + case jit_code_arg_l: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1491,10 +1752,22 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1504,6 +1777,77 @@ _emit_code(jit_state_t *_jit) case jit_code_retval_f: case jit_code_retval_d: case jit_code_prepare: case jit_code_finishr: case jit_code_finishi: + case jit_code_negi_f: case jit_code_absi_f: + case jit_code_sqrti_f: case jit_code_negi_d: + case jit_code_absi_d: case jit_code_sqrti_d: + break; + case jit_code_negi: + negi(rn(node->u.w), node->v.w); + break; + case jit_code_comi: + comi(rn(node->u.w), node->v.w); + break; + case jit_code_exti_c: + exti_c(rn(node->u.w), node->v.w); + break; + case jit_code_exti_uc: + exti_uc(rn(node->u.w), node->v.w); + break; + case jit_code_exti_s: + exti_s(rn(node->u.w), node->v.w); + break; + case jit_code_exti_us: + exti_us(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_us: + bswapi_us(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_ui: + bswapi_ui(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_us: + htoni_us(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_ui: + htoni_ui(rn(node->u.w), node->v.w); + break; +#if __WORDSIZE == 64 + case jit_code_exti_i: + exti_i(rn(node->u.w), node->v.w); + break; + case jit_code_exti_ui: + exti_ui(rn(node->u.w), node->v.w); + break; + case jit_code_bswapi_ul: + bswapi_ul(rn(node->u.w), node->v.w); + break; + case jit_code_htoni_ul: + htoni_ul(rn(node->u.w), node->v.w); + break; +#endif + case jit_code_cloi: + cloi(rn(node->u.w), node->v.w); + break; + case jit_code_clzi: + clzi(rn(node->u.w), node->v.w); + break; + case jit_code_ctoi: + ctoi(rn(node->u.w), node->v.w); + break; + case jit_code_ctzi: + ctzi(rn(node->u.w), node->v.w); + break; + case jit_code_rbiti: + rbiti(rn(node->u.w), node->v.w); + break; + case jit_code_popcnti: + popcnti(rn(node->u.w), node->v.w); + break; + case jit_code_exti: + exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); + break; + case jit_code_exti_u: + exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h); break; default: abort(); @@ -1538,6 +1882,35 @@ _emit_code(jit_state_t *_jit) #undef case_rw #undef case_rr +#if __WORDSIZE == 64 + /* Record all constants to be patched */ + for (offset = 0; offset < _jitc->patches.offset; offset++) { + node = _jitc->patches.ptr[offset].node; + value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; + put_const(value); + } + /* Record all direct constants */ + for (offset = 0; offset < _jitc->consts.vector.offset; offset++) + put_const(_jitc->consts.vector.values[offset]); + /* Now actually inject constants at the end of code buffer */ + if (_jitc->consts.hash.count) { + jit_const_t *entry; + /* Insert nop if aligned at 4 bytes */ + if (_jit->pc.w % sizeof(jit_word_t)) + nop(_jit->pc.w % sizeof(jit_word_t)); + for (offset = 0; offset < _jitc->consts.hash.size; offset++) { + entry = _jitc->consts.hash.table[offset]; + for (; entry; entry = entry->next) { + /* Make sure to not write out of bounds */ + if (_jit->pc.uc >= _jitc->code.end) + return (NULL); + entry->address = _jit->pc.w; + *_jit->pc.ul++ = entry->value; + } + } + } +#endif + for (offset = 0; offset < _jitc->patches.offset; offset++) { node = _jitc->patches.ptr[offset].node; word = _jitc->patches.ptr[offset].inst; @@ -1545,6 +1918,25 @@ _emit_code(jit_state_t *_jit) patch_at(word, value); } +#if __WORDSIZE == 64 + /* Patch direct complex constants */ + if (_jitc->consts.vector.instrs) { + for (offset = 0; offset < _jitc->consts.vector.offset; offset++) + patch_at(_jitc->consts.vector.instrs[offset], + _jitc->consts.vector.values[offset]); + jit_free((jit_pointer_t *)&_jitc->consts.vector.instrs); + jit_free((jit_pointer_t *)&_jitc->consts.vector.values); + } + + /* Hash table no longer need */ + if (_jitc->consts.hash.table) { + jit_free((jit_pointer_t *)&_jitc->consts.hash.table); + for (offset = 0; offset < _jitc->consts.pool.length; offset++) + jit_free((jit_pointer_t *)_jitc->consts.pool.ptr + offset); + jit_free((jit_pointer_t *)&_jitc->consts.pool.ptr); + } +#endif + jit_flush(_jit->code.ptr, _jit->pc.uc); return (_jit->code.ptr); @@ -1553,8 +1945,117 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" +# include "jit_fallback.c" #undef CODE +static void +_load_const(jit_state_t *_jit, jit_int32_t reg, jit_word_t value) +{ + if (_jitc->consts.vector.offset >= _jitc->consts.vector.length) { + jit_word_t new_size = _jitc->consts.vector.length * + 2 * sizeof(jit_word_t); + jit_realloc((jit_pointer_t *)&_jitc->consts.vector.instrs, + _jitc->consts.vector.length * sizeof(jit_word_t), new_size); + jit_realloc((jit_pointer_t *)&_jitc->consts.vector.values, + _jitc->consts.vector.length * sizeof(jit_word_t), new_size); + _jitc->consts.vector.length *= 2; + } + _jitc->consts.vector.instrs[_jitc->consts.vector.offset] = _jit->pc.w; + _jitc->consts.vector.values[_jitc->consts.vector.offset] = value; + ++_jitc->consts.vector.offset; + /* Resolve later the pc relative address */ + put_const(value); + AUIPC(reg, 0); + ADDI(reg, reg, 0); + LD(reg, reg, 0); +} + +static jit_word_t +hash_const(jit_word_t value) +{ + const jit_uint8_t *ptr; + jit_word_t i, key; + for (i = key = 0, ptr = (jit_uint8_t *)&value; i < 4; ++i) + key = (key << (key & 1)) ^ ptr[i]; + return (key); + +} + +static void +_put_const(jit_state_t *_jit, jit_word_t value) +{ + jit_word_t key; + jit_const_t *entry; + + /* Check if already inserted in table */ + key = hash_const(value) % _jitc->consts.hash.size; + for (entry = _jitc->consts.hash.table[key]; entry; entry = entry->next) { + if (entry->value == value) + return; + } + + /* Check if need to increase pool size */ + if (_jitc->consts.pool.list->next == NULL) { + jit_const_t *list; + jit_word_t offset; + jit_word_t new_size = (_jitc->consts.pool.length + 1) * + sizeof(jit_const_t*); + jit_realloc((jit_pointer_t *)&_jitc->consts.pool.ptr, + _jitc->consts.pool.length * sizeof(jit_const_t*), new_size); + jit_alloc((jit_pointer_t *) + _jitc->consts.pool.ptr + _jitc->consts.pool.length, + 1024 * sizeof(jit_const_t)); + list = _jitc->consts.pool.ptr[_jitc->consts.pool.length]; + _jitc->consts.pool.list->next = list; + for (offset = 0; offset < 1023; ++offset, ++list) + list->next = list + 1; + list->next = NULL; + ++_jitc->consts.pool.length; + } + + /* Rehash if more than 75% used table */ + if (_jitc->consts.hash.count > (_jitc->consts.hash.size / 4) * 3) { + jit_word_t i, k; + jit_const_t *next; + jit_const_t **table; + jit_alloc((jit_pointer_t *)&table, + _jitc->consts.hash.size * 2 * sizeof(jit_const_t *)); + for (i = 0; i < _jitc->consts.hash.size; ++i) { + for (entry = _jitc->consts.hash.table[i]; entry; entry = next) { + next = entry->next; + k = hash_const(entry->value) % (_jitc->consts.hash.size * 2); + entry->next = table[k]; + table[k] = entry; + } + } + jit_free((jit_pointer_t *)&_jitc->consts.hash.table); + _jitc->consts.hash.size *= 2; + _jitc->consts.hash.table = table; + } + + /* Insert in hash */ + entry = _jitc->consts.pool.list; + _jitc->consts.pool.list = entry->next; + ++_jitc->consts.hash.count; + entry->value = value; + entry->next = _jitc->consts.hash.table[key]; + _jitc->consts.hash.table[key] = entry; +} + +static jit_word_t +_get_const(jit_state_t *_jit, jit_word_t value) +{ + jit_word_t key; + jit_const_t *entry; + key = hash_const(value) % _jitc->consts.hash.size; + for (entry = _jitc->consts.hash.table[key]; entry; entry = entry->next) { + if (entry->value == value) + return (entry->address); + } + /* Only the final patch should call get_const() */ + abort(); +} + void jit_flush(void *fptr, void *tptr) { @@ -1592,6 +2093,30 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) stxi_d(i0, rn(r0), rn(r1)); } +#if __WORDSIZE != 64 +# error "only 64 bit ports tested" +#endif +static void +_compute_framesize(jit_state_t *_jit) +{ + jit_int32_t reg; + _jitc->framesize = 16; /* ra+fp */ + for (reg = 0; reg < jit_size(iregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) + _jitc->framesize += sizeof(jit_word_t); + + for (reg = 0; reg < jit_size(fregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) + _jitc->framesize += sizeof(jit_float64_t); + + /* Space to store variadic arguments */ + if (_jitc->function->self.call & jit_call_varargs) + _jitc->framesize += (8 - _jitc->function->vagp) * 8; + + /* Make sure functions called have a 16 byte aligned stack */ + _jitc->framesize = (_jitc->framesize + 15) & -16; +} + static void _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) {