X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flightning%2Flib%2Flightning.c;h=39c4fc2e49859dd9d3fca8c7f390377e92902494;hb=2b1b10dd333f8b525b90a15468824aae0ff4eb2f;hp=b3c245eba8ed5fe5bd0c34180837b8bd06b62553;hpb=c0c162422385a60ea7c8fa1dfe439e83e0a13d88;p=pcsx_rearmed.git diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c index b3c245eb..39c4fc2e 100644 --- a/deps/lightning/lib/lightning.c +++ b/deps/lightning/lib/lightning.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -67,6 +67,9 @@ _jit_dataset(jit_state_t *_jit); #define block_update_set(block, target) _block_update_set(_jit, block, target) static jit_bool_t _block_update_set(jit_state_t*, jit_block_t*, jit_block_t*); +#define propagate_backward(block) _propagate_backward(_jit, block) +static void _propagate_backward(jit_state_t*, jit_block_t*); + #define check_block_again() _check_block_again(_jit) static jit_bool_t _check_block_again(jit_state_t*); @@ -224,8 +227,25 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec) for (regno = 0; regno < _jitc->reglen; regno++) { if ((jit_class(_rvs[regno].spec) & spec) == spec && !jit_regset_tstbit(&_jitc->regarg, regno) && - !jit_regset_tstbit(&_jitc->reglive, regno)) + !jit_regset_tstbit(&_jitc->reglive, regno)) { + if (jit_regset_tstbit(&_jitc->regmask, regno)) { + /* search further, attempting to find a truly known + * free register, not just one in unknown state. */ + jit_int32_t regfree; + + for (regfree = regno + 1; + regfree < _jitc->reglen; regfree++) { + if ((jit_class(_rvs[regfree].spec) & spec) == spec && + !jit_regset_tstbit(&_jitc->regarg, regfree) && + !jit_regset_tstbit(&_jitc->reglive, regfree) && + !jit_regset_tstbit(&_jitc->regmask, regfree)) { + regno = regfree; + break; + } + } + } goto regarg; + } } /* search for a register matching spec that is not an argument @@ -871,6 +891,7 @@ jit_new_state(void) jit_regset_new(&_jitc->regsav); jit_regset_new(&_jitc->reglive); jit_regset_new(&_jitc->regmask); + jit_regset_new(&_jitc->explive); jit_init(); @@ -1150,6 +1171,21 @@ _jit_new_node_qww(jit_state_t *_jit, jit_code_t code, return (link_node(node)); } +jit_node_t * +_jit_new_node_wqw(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_int32_t l, + jit_int32_t h, jit_word_t w) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + assert(l != h); + node->u.w = u; + node->v.q.l = l; + node->v.q.h = h; + node->w.w = w; + return (link_node(node)); +} + jit_node_t * _jit_new_node_wwq(jit_state_t *_jit, jit_code_t code, jit_word_t u, jit_word_t v, @@ -1176,6 +1212,21 @@ _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code, return (link_node(node)); } +jit_node_t * +_jit_new_node_wqf(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_int32_t l, + jit_int32_t h, jit_float32_t w) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + assert(l != h); + node->u.w = u; + node->v.q.l = l; + node->v.q.h = h; + node->w.f = w; + return (link_node(node)); +} + jit_node_t * _jit_new_node_wwd(jit_state_t *_jit, jit_code_t code, jit_word_t u, jit_word_t v, jit_float64_t w) @@ -1188,6 +1239,21 @@ _jit_new_node_wwd(jit_state_t *_jit, jit_code_t code, return (link_node(node)); } +jit_node_t * +_jit_new_node_wqd(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_int32_t l, + jit_int32_t h, jit_float64_t w) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + assert(l != h); + node->u.w = u; + node->v.q.l = l; + node->v.q.h = h; + node->w.d = w; + return (link_node(node)); +} + jit_node_t * _jit_new_node_pww(jit_state_t *_jit, jit_code_t code, jit_pointer_t u, jit_word_t v, jit_word_t w) @@ -1332,14 +1398,36 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) mask = 0; break; case jit_code_live: case jit_code_va_end: - case jit_code_retr: case jit_code_retr_f: case jit_code_retr_d: - case jit_code_pushargr: case jit_code_pushargr_f: + case jit_code_retr_c: case jit_code_retr_uc: + case jit_code_retr_s: case jit_code_retr_us: + case jit_code_retr_i: case jit_code_retr_ui: + case jit_code_retr_l: + case jit_code_retr_f: case jit_code_retr_d: + case jit_code_pushargr_c: + case jit_code_pushargr_uc: + case jit_code_pushargr_s: + case jit_code_pushargr_us: + case jit_code_pushargr_i: + case jit_code_pushargr_ui: + case jit_code_pushargr_l: + case jit_code_pushargr_f: case jit_code_pushargr_d: case jit_code_finishr: /* synthesized will set jit_cc_a0_jmp */ mask = jit_cc_a0_reg; break; - case jit_code_align: case jit_code_reti: case jit_code_pushargi: - case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */ + case jit_code_align: case jit_code_skip: + case jit_code_reti_c: case jit_code_reti_uc: + case jit_code_reti_s: case jit_code_reti_us: + case jit_code_reti_i: case jit_code_reti_ui: + case jit_code_reti_l: + case jit_code_pushargi_c: + case jit_code_pushargi_uc: + case jit_code_pushargi_s: + case jit_code_pushargi_us: + case jit_code_pushargi_i: + case jit_code_pushargi_ui: + case jit_code_pushargi_l: + case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */ mask = jit_cc_a0_int; break; case jit_code_reti_f: case jit_code_pushargi_f: @@ -1351,7 +1439,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_allocai: mask = jit_cc_a0_int|jit_cc_a1_int; break; - case jit_code_arg: case jit_code_arg_f: case jit_code_arg_d: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: + case jit_code_arg_f: case jit_code_arg_d: mask = jit_cc_a0_int|jit_cc_a0_arg; break; case jit_code_calli: case jit_code_jmpi: @@ -1375,11 +1465,17 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_getarg_f: case jit_code_getarg_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg; break; - case jit_code_putargr: case jit_code_putargr_f: - case jit_code_putargr_d: + case jit_code_putargr_c:case jit_code_putargr_uc: + case jit_code_putargr_s:case jit_code_putargr_us: + case jit_code_putargr_i:case jit_code_putargr_ui: + case jit_code_putargr_l: + case jit_code_putargr_f:case jit_code_putargr_d: mask = jit_cc_a0_reg|jit_cc_a1_arg; break; - case jit_code_putargi: + case jit_code_putargi_c:case jit_code_putargi_uc: + case jit_code_putargi_s:case jit_code_putargi_us: + case jit_code_putargi_i:case jit_code_putargi_ui: + case jit_code_putargi_l: mask = jit_cc_a0_int|jit_cc_a1_arg; break; case jit_code_putargi_f: @@ -1392,12 +1488,34 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_ldi_s: case jit_code_ldi_us: case jit_code_ldi_i: case jit_code_ldi_ui: case jit_code_ldi_l: case jit_code_ldi_f: case jit_code_ldi_d: + case jit_code_negi: case jit_code_comi: + case jit_code_exti_c: case jit_code_exti_uc: + case jit_code_exti_s: case jit_code_exti_us: + case jit_code_exti_i: case jit_code_exti_ui: + case jit_code_bswapi_us:case jit_code_bswapi_ui: + case jit_code_bswapi_ul: + case jit_code_htoni_us: case jit_code_htoni_ui: + case jit_code_htoni_ul: + case jit_code_cloi: case jit_code_clzi: + case jit_code_ctoi: case jit_code_ctzi: + case jit_code_rbiti: case jit_code_popcnti: + case jit_code_movi_w_f: case jit_code_movi_w_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int; break; + case jit_code_unldi: case jit_code_unldi_u: case jit_code_unldi_x: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|jit_cc_a2_int; + break; + case jit_code_movi_ww_d: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|jit_cc_a2_int; + break; case jit_code_movi_f: case jit_code_movi_f_w: + case jit_code_negi_f: case jit_code_absi_f: + case jit_code_sqrti_f: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_flt; break; case jit_code_movi_d: case jit_code_movi_d_w: + case jit_code_negi_d: case jit_code_absi_d: + case jit_code_sqrti_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_dbl; break; case jit_code_movi_d_ww: @@ -1418,7 +1536,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_extr_f: case jit_code_extr_d_f: case jit_code_ldr_f: case jit_code_negr_d: case jit_code_absr_d: case jit_code_sqrtr_d: case jit_code_movr_d: case jit_code_extr_d: case jit_code_extr_f_d: - case jit_code_ldr_d: + case jit_code_ldr_d: case jit_code_rbitr: case jit_code_popcntr: + case jit_code_clor: case jit_code_clzr: + case jit_code_ctor: case jit_code_ctzr: case jit_code_movr_w_f: case jit_code_movr_f_w: case jit_code_movr_w_d: case jit_code_movr_d_w: case jit_code_va_arg: case jit_code_va_arg_d: @@ -1431,20 +1551,25 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_addi: case jit_code_addxi: case jit_code_addci: case jit_code_subi: case jit_code_subxi: case jit_code_subci: case jit_code_rsbi: - case jit_code_muli: case jit_code_divi: case jit_code_divi_u: + case jit_code_muli: case jit_code_hmuli: case jit_code_hmuli_u: + case jit_code_divi: case jit_code_divi_u: case jit_code_remi: case jit_code_remi_u: case jit_code_andi: case jit_code_ori: case jit_code_xori: case jit_code_lshi: - case jit_code_rshi: case jit_code_rshi_u: case jit_code_lti: + case jit_code_rshi: case jit_code_rshi_u: case jit_code_lroti: + case jit_code_rroti: case jit_code_lti: case jit_code_lti_u: case jit_code_lei: case jit_code_lei_u: case jit_code_eqi: case jit_code_gei: case jit_code_gei_u: case jit_code_gti: case jit_code_gti_u: case jit_code_nei: case jit_code_ldxi_c: case jit_code_ldxi_uc: case jit_code_ldxi_s: case jit_code_ldxi_us: case jit_code_ldxi_i: case jit_code_ldxi_ui: case jit_code_ldxi_l: case jit_code_ldxi_f: case jit_code_ldxi_d: + case jit_code_unldr: case jit_code_unldr_u: case jit_code_unldr_x: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_int; break; case jit_code_qmuli: case jit_code_qmuli_u: case jit_code_qdivi: case jit_code_qdivi_u: + case jit_code_qlshi: case jit_code_qlshi_u: + case jit_code_qrshi: case jit_code_qrshi_u: mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg| jit_cc_a1_reg|jit_cc_a2_int; break; @@ -1468,10 +1593,12 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) break; case jit_code_addr: case jit_code_addxr: case jit_code_addcr: case jit_code_subr: case jit_code_subxr: case jit_code_subcr: - case jit_code_mulr: case jit_code_divr: case jit_code_divr_u: + case jit_code_mulr: case jit_code_hmulr: case jit_code_hmulr_u: + case jit_code_divr: case jit_code_divr_u: case jit_code_remr: case jit_code_remr_u: case jit_code_andr: case jit_code_orr: case jit_code_xorr: case jit_code_lshr: - case jit_code_rshr: case jit_code_rshr_u: case jit_code_ltr: + case jit_code_rshr: case jit_code_rshr_u: case jit_code_lrotr: + case jit_code_rrotr: case jit_code_ltr: case jit_code_ltr_u: case jit_code_ler: case jit_code_ler_u: case jit_code_eqr: case jit_code_ger: case jit_code_ger_u: case jit_code_gtr: case jit_code_gtr_u: case jit_code_ner: @@ -1495,6 +1622,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) break; case jit_code_qmulr: case jit_code_qmulr_u: case jit_code_qdivr: case jit_code_qdivr_u: + case jit_code_qlshr: case jit_code_qlshr_u: + case jit_code_qrshr: case jit_code_qrshr_u: mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg| jit_cc_a1_reg|jit_cc_a2_reg; break; @@ -1502,6 +1631,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_sti_l: case jit_code_sti_f: case jit_code_sti_d: mask = jit_cc_a0_int|jit_cc_a1_reg; break; + case jit_code_unsti: case jit_code_unsti_x: + mask = jit_cc_a0_int|jit_cc_a1_reg|jit_cc_a2_int; + break; case jit_code_blti: case jit_code_blti_u: case jit_code_blei: case jit_code_blei_u: case jit_code_beqi: case jit_code_bgei: case jit_code_bgei_u: case jit_code_bgti: case jit_code_bgti_u: @@ -1528,6 +1660,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_str_l: case jit_code_str_f: case jit_code_str_d: mask = jit_cc_a0_reg|jit_cc_a1_reg; break; + case jit_code_unstr: case jit_code_unstr_x: + mask = jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_int; + break; case jit_code_stxi_c: case jit_code_stxi_s: case jit_code_stxi_i: case jit_code_stxi_l: case jit_code_stxi_f: case jit_code_stxi_d: mask = jit_cc_a0_int|jit_cc_a1_reg|jit_cc_a2_reg; @@ -1573,6 +1708,29 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int| jit_cc_a2_reg|jit_cc_a2_rlh; break; + case jit_code_extr: case jit_code_extr_u: case jit_code_depr: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_rlh; + break; + case jit_code_exti: case jit_code_exti_u: case jit_code_depi: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|jit_cc_a2_rlh; + break; + case jit_code_fmar_f: case jit_code_fmar_d: + case jit_code_fmsr_f: case jit_code_fmsr_d: + case jit_code_fnmar_f: case jit_code_fnmar_d: + case jit_code_fnmsr_f: case jit_code_fnmsr_d: + mask = jit_cc_a0_reg|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_reg; + break; + case jit_code_fmai_f: case jit_code_fmsi_f: + case jit_code_fnmai_f: case jit_code_fnmsi_f: + mask = jit_cc_a0_reg|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_flt; + break; + case jit_code_fmai_d: case jit_code_fmsi_d: + case jit_code_fnmai_d: case jit_code_fnmsi_d: + mask = jit_cc_a0_reg|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl; + break; default: abort(); } @@ -1583,8 +1741,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) void _jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address) { +#ifndef NDEBUG jit_int32_t mask; - +#endif switch (instr->code) { case jit_code_movi: case jit_code_ldi_c: case jit_code_ldi_uc: case jit_code_ldi_s: case jit_code_ldi_us: case jit_code_ldi_i: @@ -1597,7 +1756,9 @@ _jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address) instr->u.p = address; break; default: +#ifndef NDEBUG mask = jit_classify(instr->code); +#endif assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp); instr->u.p = address; } @@ -1606,8 +1767,9 @@ _jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address) void _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label) { +#ifndef NDEBUG jit_int32_t mask; - +#endif assert(!(instr->flag & jit_flag_node)); instr->flag |= jit_flag_node; switch (instr->code) { @@ -1624,7 +1786,9 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label) instr->u.n = label; break; default: +#ifndef NDEBUG mask = jit_classify(instr->code); +#endif assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp); assert(label->code == jit_code_label); instr->u.n = label; @@ -1645,8 +1809,14 @@ _do_setup(jit_state_t *_jit) * at the start of a basic block */ for (offset = 0; offset < _jitc->blocks.offset; offset++) { block = _jitc->blocks.ptr + offset; - if (!block->label || block->label->code == jit_code_epilog) + if (!block->label) continue; + if (block->label->code == jit_code_epilog) { + jit_regset_setbit(&block->reglive, JIT_RET); + jit_regset_setbit(&block->reglive, JIT_FRET); + jit_regset_com(&block->regmask, &block->reglive); + continue; + } jit_setup(block); } } @@ -1670,6 +1840,21 @@ _block_update_set(jit_state_t *_jit, return (0); } +static void +_propagate_backward(jit_state_t *_jit, jit_block_t *block) +{ + jit_block_t *prev; + jit_word_t offset; + + for (offset = block->label->v.w - 1; + offset >= 0; --offset) { + prev = _jitc->blocks.ptr + offset; + if (!block_update_set(prev, block) || + !(prev->label->flag & jit_flag_head)) + break; + } +} + static jit_bool_t _check_block_again(jit_state_t *_jit) { @@ -1708,12 +1893,11 @@ _check_block_again(jit_state_t *_jit) block = NULL; target = _jitc->blocks.ptr + node->v.w; - /* Update if previous block pass through */ - if (block && block->again && block_update_set(target, block)) + if (block && target->again && block_update_set(block, target)) { + propagate_backward(block); todo = 1; + } block = target; - if (!block->again) - continue; } /* If not the first jmpi */ else if (block) { @@ -1724,14 +1908,16 @@ _check_block_again(jit_state_t *_jit) label = node->u.n; /* Mark predecessor needs updating due to target change */ target = _jitc->blocks.ptr + label->v.w; - if (target->again && block_update_set(block, target)) + if (target->again && block_update_set(block, target)) { + propagate_backward(block); todo = 1; + } } } } while (todo); - return (1); + return (todo); } static void @@ -1762,6 +1948,7 @@ _jit_optimize(jit_state_t *_jit) jit_node_t *node; jit_block_t *block; jit_word_t offset; + jit_regset_t regmask; todo = 0; _jitc->function = NULL; @@ -1772,19 +1959,31 @@ _jit_optimize(jit_state_t *_jit) do_setup(); do_follow(1); - patch_registers(); - if (simplify()) - todo = 1; + jit_regset_set_ui(®mask, 0); + for (offset = 0; offset < _jitc->reglen; offset++) { + if ((jit_class(_rvs[offset].spec) & (jit_class_gpr|jit_class_fpr)) && + (jit_class(_rvs[offset].spec) & jit_class_sav) == jit_class_sav) + jit_regset_setbit(®mask, offset); + } - /* Figure out labels that are only reached with a jump - * and is required to do a simple redundant_store removal - * on jit_beqi below */ + /* Figure out labels that are only reached with a jump */ jump = 1; for (node = _jitc->head; node; node = node->next) { switch (node->code) { case jit_code_label: - if (!jump) + if (!jump) { node->flag |= jit_flag_head; + if (!node->link) { + /* Block is dead code or only reachable with an + * indirect jumps. In such condition, must assume + * all callee save registers are live. */ + block = _jitc->blocks.ptr + node->v.w; + jit_regset_ior(&block->reglive, + &block->reglive, ®mask); + /* Cleanup regmask */ + block_update_set(block, block); + } + } break; case jit_code_jmpi: case jit_code_jmpr: case jit_code_epilog: @@ -1798,6 +1997,10 @@ _jit_optimize(jit_state_t *_jit) } } + patch_registers(); + if (simplify()) + todo = 1; + for (node = _jitc->head; node; node = node->next) { mask = jit_classify(node->code); if (mask & jit_cc_a0_reg) @@ -1913,6 +2116,10 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) case jit_code_label: case jit_code_prolog: case jit_code_epilog: block = _jitc->blocks.ptr + node->v.w; jit_regset_set(&_jitc->reglive, &block->reglive); + jit_regset_set_ui(&_jitc->explive, 0); + break; + case jit_code_live: + jit_regset_setbit(&_jitc->explive, node->u.w); break; case jit_code_callr: value = jit_regno(node->u.w); @@ -1960,13 +2167,22 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) } } } - if ((value & jit_cc_a1_reg) && !(node->v.w & jit_regno_patch)) { - if (value & jit_cc_a1_chg) { - jit_regset_clrbit(&_jitc->reglive, node->v.w); - jit_regset_setbit(&_jitc->regmask, node->v.w); + if (value & jit_cc_a1_reg) { + if (value & jit_cc_a1_rlh) { + /* Assume registers are not changed */ + if (!(node->v.q.l & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->v.q.l); + if (!(node->v.q.h & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->v.q.h); + } + else if (!(node->v.w & jit_regno_patch)) { + if (value & jit_cc_a1_chg) { + jit_regset_clrbit(&_jitc->reglive, node->v.w); + jit_regset_setbit(&_jitc->regmask, node->v.w); + } + else + jit_regset_setbit(&_jitc->reglive, node->v.w); } - else - jit_regset_setbit(&_jitc->reglive, node->v.w); } if (value & jit_cc_a2_reg) { if (value & jit_cc_a2_rlh) { @@ -2014,8 +2230,14 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) else jit_regset_setbit(&_jitc->regarg, jit_regno(node->u.w)); } - if (value & jit_cc_a1_reg) - jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w)); + if (value & jit_cc_a1_reg) { + if (value & jit_cc_a1_rlh) { + jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.q.l)); + jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.q.h)); + } + else + jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w)); + } if (value & jit_cc_a2_reg) { if (value & jit_cc_a2_rlh) { jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l)); @@ -2024,6 +2246,19 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) else jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w)); } + /* Prevent incorrect detection of running out of registers + * if will need to patch jump, and all registers have been + * used in the current block. */ + if (node->code == jit_code_jmpi && (node->flag & jit_flag_node)) { + jit_node_t *label = node->u.n; + jit_block_t *block = _jitc->blocks.ptr + label->v.w; + jit_regset_set(&_jitc->reglive, &block->reglive); + jit_regset_set(&_jitc->regmask, &block->regmask); + if (jit_regset_set_p(&_jitc->explive)) { + jit_regset_ior(&_jitc->reglive, &block->reglive, &_jitc->explive); + jit_regset_xor(&_jitc->regmask, &_jitc->regmask, &_jitc->explive); + } + } } void @@ -2040,8 +2275,14 @@ _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) else jit_regset_clrbit(&_jitc->regarg, jit_regno(node->u.w)); } - if (value & jit_cc_a1_reg) - jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w)); + if (value & jit_cc_a1_reg) { + if (value & jit_cc_a1_rlh) { + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.q.l)); + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.q.h)); + } + else + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w)); + } if (value & jit_cc_a2_reg) { if (value & jit_cc_a2_rlh) { jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l)); @@ -2206,7 +2447,9 @@ _jit_emit(jit_state_t *_jit) jit_pointer_t code; jit_node_t *node; size_t length; +#ifndef NDEBUG int result; +#endif #if defined(__sgi) int mmap_fd; #endif @@ -2225,7 +2468,7 @@ _jit_emit(jit_state_t *_jit) #else if (!_jit->user_code) { mmap_prot = PROT_READ | PROT_WRITE; -#if !__OpenBSD__ +#if !(__OpenBSD__ || __APPLE__) mmap_prot |= PROT_EXEC; #endif #if __NetBSD__ @@ -2249,8 +2492,10 @@ _jit_emit(jit_state_t *_jit) for (;;) { #if __NetBSD__ - result = mprotect(_jit->code.ptr, _jit->code.length, - PROT_READ | PROT_WRITE); +#ifndef NDEBUG + result = +#endif + mprotect(_jit->code.ptr, _jit->code.length, PROT_READ | PROT_WRITE); assert(result == 0); #endif if ((code = emit_code()) == NULL) { @@ -2288,8 +2533,7 @@ _jit_emit(jit_state_t *_jit) # endif #else _jit->code.ptr = mmap(NULL, length, - PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + mmap_prot, mmap_flags, mmap_fd, 0); #endif assert(_jit->code.ptr != MAP_FAILED); @@ -2316,17 +2560,22 @@ _jit_emit(jit_state_t *_jit) jit_free((jit_pointer_t *)&_jitc->data.ptr); #if HAVE_MMAP else { - result = mprotect(_jit->data.ptr, - _jit->data.length, PROT_READ); +# ifndef NDEBUG + result = +# endif + mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); assert(result == 0); } if (!_jit->user_code) { - length = _jit->pc.uc - _jit->code.ptr; + _jit->code.protect = _jit->pc.uc - _jit->code.ptr; # if __riscv && __WORDSIZE == 64 /* FIXME should start adding consts at a page boundary */ - length -= _jitc->consts.hash.count * sizeof(jit_word_t); + _jit->code.protect -= _jitc->consts.hash.count * sizeof(jit_word_t); +# endif +# ifndef NDEBUG + result = + mprotect(_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_EXEC); # endif - result = mprotect(_jit->code.ptr, length, PROT_READ | PROT_EXEC); assert(result == 0); } #endif /* HAVE_MMAP */ @@ -2336,6 +2585,42 @@ fail: return (NULL); } +void +_jit_protect(jit_state_t *_jit) +{ +#if !HAVE_MMAP + assert (_jit->user_code); +#else +# ifndef NDEBUG + int result; +# endif + if (_jit->user_code) return; +# ifndef NDEBUG + result = +#endif + mprotect (_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_EXEC); + assert (result == 0); +#endif +} + +void +_jit_unprotect(jit_state_t *_jit) +{ +#if !HAVE_MMAP + assert (_jit->user_code); +#else +# ifndef NDEBUG + int result; +# endif + if (_jit->user_code) return; +# ifndef NDEBUG + result = +# endif + mprotect (_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_WRITE); + assert (result == 0); +#endif +} + void _jit_frame(jit_state_t *_jit, jit_int32_t frame) { @@ -2401,21 +2686,51 @@ _jit_setup(jit_state_t *_jit, jit_block_t *block) /* Check argument registers in reverse order to properly * handle registers that are both, argument and result */ value = jit_classify(node->code); - if ((value & jit_cc_a2_reg) && - !(node->w.w & jit_regno_patch) && - jit_regset_tstbit(&block->regmask, node->w.w)) { + if (value & jit_cc_a2_reg) { live = !(value & jit_cc_a2_chg); - jit_regset_clrbit(&block->regmask, node->w.w); - if (live) - jit_regset_setbit(&block->reglive, node->w.w); + if (value & jit_cc_a2_rlh) { + /* Assume will not modify a pair in second argument */ + assert(live); + if (!(node->w.q.l & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->w.q.l)) { + jit_regset_clrbit(&block->regmask, node->w.q.l); + } + if (!(node->w.q.h & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->w.q.h)) { + jit_regset_clrbit(&block->regmask, node->w.q.h); + } + } + else { + if (!(node->w.w & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->w.w)) { + jit_regset_clrbit(&block->regmask, node->w.w); + if (live) + jit_regset_setbit(&block->reglive, node->w.w); + } + } } - if ((value & jit_cc_a1_reg) && - !(node->v.w & jit_regno_patch) && - jit_regset_tstbit(&block->regmask, node->v.w)) { + if (value & jit_cc_a1_reg) { live = !(value & jit_cc_a1_chg); - jit_regset_clrbit(&block->regmask, node->v.w); - if (live) - jit_regset_setbit(&block->reglive, node->v.w); + if (value & jit_cc_a1_rlh) { + /* Assume will not modify a pair in second argument */ + assert(live); + if (!(node->v.q.l & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->v.q.l)) { + jit_regset_clrbit(&block->regmask, node->v.q.l); + } + if (!(node->v.q.h & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->v.q.h)) { + jit_regset_clrbit(&block->regmask, node->v.q.h); + } + } + else { + if (!(node->v.w & jit_regno_patch) && + jit_regset_tstbit(&block->regmask, node->v.w)) { + jit_regset_clrbit(&block->regmask, node->v.w); + if (live) + jit_regset_setbit(&block->reglive, node->v.w); + } + } } if (value & jit_cc_a0_reg) { live = !(value & jit_cc_a0_chg); @@ -2520,23 +2835,34 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block) } } else { - if (value & jit_cc_a2_reg) { - if (!(node->w.w & jit_regno_patch)) { - if (jit_regset_tstbit(®mask, node->w.w)) { - jit_regset_clrbit(®mask, node->w.w); - if (!(value & jit_cc_a2_chg)) - jit_regset_setbit(®live, node->w.w); - } + if (!(node->w.w & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->w.w)) { + jit_regset_clrbit(®mask, node->w.w); + if (!(value & jit_cc_a2_chg)) + jit_regset_setbit(®live, node->w.w); } } } } if (value & jit_cc_a1_reg) { - if (!(node->v.w & jit_regno_patch)) { - if (jit_regset_tstbit(®mask, node->v.w)) { - jit_regset_clrbit(®mask, node->v.w); - if (!(value & jit_cc_a1_chg)) - jit_regset_setbit(®live, node->v.w); + if (value & jit_cc_a1_rlh) { + if (!(node->v.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(®mask, node->v.q.l)) + jit_regset_clrbit(®mask, node->v.q.l); + } + if (!(node->v.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->v.q.h)) + jit_regset_clrbit(®mask, node->v.q.h); + } + } + else { + if (!(node->v.w & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->v.w)) { + jit_regset_clrbit(®mask, node->v.w); + if (!(value & jit_cc_a1_chg)) + jit_regset_setbit(®live, node->v.w); + } } } } @@ -2696,11 +3022,24 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, } } if (value & jit_cc_a1_reg) { - if (!(node->v.w & jit_regno_patch)) { - if (jit_regset_tstbit(mask, node->v.w)) { - jit_regset_clrbit(mask, node->v.w); - if (!(value & jit_cc_a1_chg)) - jit_regset_setbit(live, node->v.w); + if (value & jit_cc_a1_rlh) { + if (!(node->v.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(mask, node->v.q.l)) + jit_regset_clrbit(mask, node->v.q.l); + } + if (!(node->v.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->v.q.h)) + jit_regset_clrbit(mask, node->v.q.h); + } + } + else { + if (!(node->v.w & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->v.w)) { + jit_regset_clrbit(mask, node->v.w); + if (!(value & jit_cc_a1_chg)) + jit_regset_setbit(live, node->v.w); + } } } } @@ -2767,6 +3106,9 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, * to jump to unknown location. */ /* Treat all callee save as live. */ jit_regset_ior(live, live, mask); + /* Prevent explicitly set as live registers to + * be used as a temporary for the jmpi. */ + jit_regset_ior(live, live, &_jitc->explive); /* Treat anything else as dead. */ return; } @@ -2834,7 +3176,10 @@ _sequential_labels(jit_state_t *_jit) if ((jump = node->link)) { for (; jump; jump = link) { link = jump->link; - jump->u.n = prev; + if (jump->code == jit_code_movi) + jump->v.n = prev; + else + jump->u.n = prev; jump->link = prev->link; prev->link = jump; } @@ -2848,7 +3193,10 @@ _sequential_labels(jit_state_t *_jit) if ((jump = next->link)) { for (; jump; jump = link) { link = jump->link; - jump->u.n = node; + if (jump->code == jit_code_movi) + jump->v.n = node; + else + jump->u.n = node; jump->link = node->link; node->link = jump; } @@ -2870,36 +3218,59 @@ _split_branches(jit_state_t *_jit) jit_node_t *next; jit_node_t *label; jit_block_t *block; - + jit_block_t *blocks; + jit_word_t offset; + jit_word_t length; + + length = _jitc->blocks.length; + jit_alloc((jit_pointer_t *)&blocks, length * sizeof(jit_block_t)); + if ((node = _jitc->head) && + (node->code == jit_code_label || node->code == jit_code_prolog)) { + block = _jitc->blocks.ptr + node->v.w; + memcpy(blocks, block, sizeof(jit_block_t)); + node->v.w = 0; + offset = 1; + } + else + offset = 0; for (node = _jitc->head; node; node = next) { if ((next = node->next)) { if (next->code == jit_code_label || next->code == jit_code_prolog || - next->code == jit_code_epilog) - continue; + next->code == jit_code_epilog) { + if (offset >= length) { + jit_realloc((jit_pointer_t *)&blocks, + length * sizeof(jit_block_t), + (length + 16) * sizeof(jit_block_t)); + length += 16; + } + block = _jitc->blocks.ptr + next->v.w; + memcpy(blocks + offset, block, sizeof(jit_block_t)); + next->v.w = offset++; + } /* split block on branches */ - if (jit_classify(node->code) & jit_cc_a0_jmp) { + else if (jit_classify(node->code) & jit_cc_a0_jmp) { label = new_node(jit_code_label); label->next = next; node->next = label; - if (_jitc->blocks.offset >= _jitc->blocks.length) { - jit_word_t length; - - length = _jitc->blocks.length + 16; - jit_realloc((jit_pointer_t *)&_jitc->blocks.ptr, - _jitc->blocks.length * sizeof(jit_block_t), - length * sizeof(jit_block_t)); - _jitc->blocks.length = length; + if (offset >= length) { + jit_realloc((jit_pointer_t *)&blocks, + length * sizeof(jit_block_t), + (length + 16) * sizeof(jit_block_t)); + length += 16; } - block = _jitc->blocks.ptr + _jitc->blocks.offset; + block = blocks + offset; block->label = label; - label->v.w = _jitc->blocks.offset; + label->v.w = offset++; jit_regset_new(&block->reglive); jit_regset_new(&block->regmask); - ++_jitc->blocks.offset; } } } + jit_free((jit_pointer_t *)&_jitc->blocks.ptr); + _jitc->blocks.ptr = blocks; + _jitc->blocks.offset = offset; + _jitc->blocks.length = length; } static jit_bool_t @@ -2980,7 +3351,6 @@ _redundant_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) } break; case jit_code_name: case jit_code_note: - case jit_code_align: break; default: return (0); @@ -3031,7 +3401,7 @@ reverse_jump_code(jit_code_t code) case jit_code_bgti_f: return (jit_code_bunlei_f); case jit_code_bner_f: return (jit_code_beqr_f); - case jit_code_bnei_f: return (jit_code_beqr_f); + case jit_code_bnei_f: return (jit_code_beqi_f); case jit_code_bunltr_f: return (jit_code_bger_f); case jit_code_bunlti_f: return (jit_code_bgei_f); @@ -3242,11 +3612,21 @@ _simplify_movr(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node, { jit_int32_t regno; jit_int32_t right; + jit_int32_t spec; jit_value_t *value; regno = jit_regno(node->u.w); right = jit_regno(node->v.w); value = _jitc->values + regno; + + spec = jit_class(_rvs[regno].spec); + if (!(spec & (jit_class_gpr | jit_class_fpr))) { + /* reserved register */ + value->kind = 0; + ++_jitc->gen[regno]; + return (0); + } + if ((value->kind == jit_kind_register && jit_regno(value->base.q.l) == right && value->base.q.h == _jitc->gen[right]) || @@ -3284,12 +3664,18 @@ _simplify_movi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node, ++_jitc->gen[regno]; return (0); } + spec = jit_class(_rvs[regno].spec); + if (!(spec & (jit_class_gpr | jit_class_fpr))) { + /* reserved register */ + value->kind = 0; + ++_jitc->gen[regno]; + return (0); + } if (value->kind == kind) { if (memcmp(&node->v.w, &value->base.w, size) == 0) { del_node(prev, node); return (1); } - spec = jit_class(_rvs[regno].spec); if (kind == jit_kind_word) spec &= jit_class_gpr; else @@ -3380,7 +3766,6 @@ _simplify_stxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) /* no multiple information, so, if set to a constant, * prefer to keep that information */ if (value->kind == 0) { - value->kind = jit_kind_code; switch (node->code) { /* no information about signed/unsigned either */ case jit_code_stxi_c: value->code = jit_code_ldxi_c; break; @@ -3551,9 +3936,24 @@ _simplify(jit_state_t *_jit) } } if (info & jit_cc_a1_chg) { - regno = jit_regno(node->v.w); - _jitc->values[regno].kind = 0; - ++_jitc->gen[regno]; +#if 0 + /* Assume registers are not changed */ + if (info & jit_cc_a1_rlh) { + regno = jit_regno(node->v.q.l); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + regno = jit_regno(node->v.q.h); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + } + else { +#endif + regno = jit_regno(node->v.w); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; +#if 0 + } +#endif } if (info & jit_cc_a2_chg) { #if 0 @@ -3586,33 +3986,51 @@ _register_change_p(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, jit_int32_t regno) { jit_int32_t value; + jit_bool_t use, change; for (; node != link; node = node->next) { switch (node->code) { case jit_code_label: case jit_code_prolog: - /* lack of extra information so cannot say it is undefined */ - return (jit_reg_change); + /* lack of extra information so assume it is live */ + return (jit_reg_static); case jit_code_callr: case jit_code_calli: if (!(jit_class(_rvs[regno].spec) & jit_class_sav)) return (jit_reg_undef); break; default: value = jit_classify(node->code); + use = change = 0; + if (value & jit_cc_a0_rlh) { + if (node->u.q.l == regno || node->u.q.h == regno) { + if (value & jit_cc_a0_chg) + change = 1; + use = !change; + } + } + else if ((value & jit_cc_a0_reg) && node->u.w == regno) { + if (value & jit_cc_a0_chg) + change = 1; + use = !change; + } + if (!use && + (value & jit_cc_a1_reg) && node->v.w == regno) { + if (value & jit_cc_a1_chg) + change = 1; + use = !change; + } + if (!use && + (value & jit_cc_a2_reg) && node->w.w == regno) { + if (value & jit_cc_a2_chg) + change = 1; + use = !change; + } /* lack of extra information */ - if (value & (jit_cc_a0_jmp|jit_cc_a0_cnd)) - return (jit_reg_change); - else if ((value & (jit_cc_a0_reg|jit_cc_a0_chg)) == - (jit_cc_a0_reg|jit_cc_a0_chg) && - (((value & jit_cc_a0_rlh) && - (node->u.q.l == regno || node->u.q.h == regno)) || - (!(value & jit_cc_a0_rlh) && - node->u.w == regno))) - return (jit_reg_change); - else if ((value & jit_cc_a1_reg) && node->v.w == regno && - (value & jit_cc_a1_chg)) - return (jit_reg_change); - else if ((value & jit_cc_a2_reg) && node->w.w == regno && - (value & jit_cc_a2_chg)) + if (!use && (value & (jit_cc_a0_jmp|jit_cc_a0_cnd)) && + /* In case of indirect branches, always consider + * callee save registers as live. */ + !(jit_class(_rvs[regno].spec) & jit_class_sav)) + change = 1; + if (!use && change) return (jit_reg_change); } } @@ -3777,8 +4195,18 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, node->u.w = patch; } } - if ((value & jit_cc_a1_reg) && node->v.w == regno) - node->v.w = patch; + if (value & jit_cc_a1_reg) { + if (value & jit_cc_a1_rlh) { + if (node->v.q.l == regno) + node->v.q.l = patch; + if (node->v.q.h == regno) + node->v.q.h = patch; + } + else { + if (node->v.w == regno) + node->v.w = patch; + } + } if (value & jit_cc_a2_reg) { if (value & jit_cc_a2_rlh) { if (node->w.q.l == regno) @@ -3819,6 +4247,96 @@ static maybe_unused void generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); #endif +#define depi(r0, i0, i1, i2) _depi(_jit, r0, i0, i1, i2) +static void _depi(jit_state_t*,jit_int32_t, jit_word_t, jit_word_t, jit_word_t); +#define negi(r0, i0) _negi(_jit, r0, i0) +static void _negi(jit_state_t*, jit_int32_t, jit_word_t); +#define comi(r0, i0) _comi(_jit, r0, i0) +static void _comi(jit_state_t*, jit_int32_t, jit_word_t); +#define exti_c(r0, i0) _exti_c(_jit, r0, i0) +static void _exti_c(jit_state_t*, jit_int32_t, jit_word_t); +#define exti_uc(r0, i0) _exti_uc(_jit, r0, i0) +static void _exti_uc(jit_state_t*, jit_int32_t, jit_word_t); +#define exti_s(r0, i0) _exti_s(_jit, r0, i0) +static void _exti_s(jit_state_t*, jit_int32_t, jit_word_t); +#define exti_us(r0, i0) _exti_us(_jit, r0, i0) +static void _exti_us(jit_state_t*, jit_int32_t, jit_word_t); +#if __WORDSIZE == 64 +#define exti_i(r0, i0) _exti_i(_jit, r0, i0) +static void _exti_i(jit_state_t*, jit_int32_t, jit_word_t); +#define exti_ui(r0, i0) _exti_ui(_jit, r0, i0) +static void _exti_ui(jit_state_t*, jit_int32_t, jit_word_t); +#endif +#define bswapi_us(r0, i0) _bswapi_us(_jit, r0, i0) +static void _bswapi_us(jit_state_t*, jit_int32_t, jit_word_t); +#define bswapi_ui(r0, i0) _bswapi_ui(_jit, r0, i0) +static void _bswapi_ui(jit_state_t*, jit_int32_t, jit_word_t); +#if __WORDSIZE == 64 +# define bswapi_ul(r0, i0) _bswapi_ul(_jit, r0, i0) +static void _bswapi_ul(jit_state_t*, jit_int32_t, jit_word_t); +#endif +#define htoni_us(r0, i0) _htoni_us(_jit, r0, i0) +static void _htoni_us(jit_state_t*, jit_int32_t, jit_word_t); +#define htoni_ui(r0, i0) _htoni_ui(_jit, r0, i0) +static void _htoni_ui(jit_state_t*, jit_int32_t, jit_word_t); +#if __WORDSIZE == 64 +# define htoni_ul(r0, i0) _htoni_ul(_jit, r0, i0) +static void _htoni_ul(jit_state_t*, jit_int32_t, jit_word_t); +#endif +# define movi_f_w(r0, i0) _movi_f_w(_jit, r0, i0) +static void _movi_f_w(jit_state_t*, jit_int32_t, jit_float32_t); +#if __WORDSIZE == 32 && !(defined(__mips__) && NEW_ABI) +# define movi_d_ww(r0, r1, i0) _movi_d_ww(_jit, r0, r1, i0) +static void _movi_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t); +#else +# define movi_d_w(r0, i0) _movi_d_w(_jit, r0, i0) +static void _movi_d_w(jit_state_t*, jit_int32_t, jit_float64_t); +#endif +#define cloi(r0, i0) _cloi(_jit, r0, i0) +static void _cloi(jit_state_t*, jit_int32_t, jit_word_t); +#define clzi(r0, i0) _clzi(_jit, r0, i0) +static void _clzi(jit_state_t*, jit_int32_t, jit_word_t); +#define ctoi(r0, i0) _ctoi(_jit, r0, i0) +static void _ctoi(jit_state_t*, jit_int32_t, jit_word_t); +#define ctzi(r0, i0) _ctzi(_jit, r0, i0) +static void _ctzi(jit_state_t*, jit_int32_t, jit_word_t); +#define rbiti(r0, i0) _rbiti(_jit, r0, i0) +static void _rbiti(jit_state_t*, jit_int32_t, jit_word_t); +#define popcnti(r0, i0) _popcnti(_jit, r0, i0) +static void _popcnti(jit_state_t*, jit_int32_t, jit_word_t); +#define exti(r0, i0, i1, i2) _exti(_jit, r0, i0, i1, i2) +static void _exti(jit_state_t*,jit_int32_t, jit_word_t, jit_word_t, jit_word_t); +#define exti_u(r0, i0, i1, i2) _exti_u(_jit, r0, i0, i1, i2) +static void _exti_u(jit_state_t*, + jit_int32_t, jit_word_t, jit_word_t, jit_word_t); +#define generic_unldr(r0, r1, i0) _generic_unldr(_jit, r0, r1, i0) +static void _generic_unldr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_unldi(r0, i0, i1) _generic_unldi(_jit, r0, i0, i1) +static void _generic_unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +#define generic_unldr_u(r0, r1, i0) _generic_unldr_u(_jit, r0, r1, i0) +static void _generic_unldr_u(jit_state_t*, + jit_int32_t, jit_int32_t, jit_word_t); +#define generic_unldi_u(r0, i0, i1) _generic_unldi_u(_jit, r0, i0, i1) +static void _generic_unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +#define generic_unstr(r0, r1, i0) _generic_unstr(_jit, r0, r1, i0) +static void _generic_unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +#define generic_unsti(i0, r0, i1) _generic_unsti(_jit, i0, r0, i1) +static void _generic_unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +#if !defined(__i386__) && !defined(__x86_64__) +# define generic_unldr_x(r0, r1, i0) _generic_unldr_x(_jit, r0, r1, i0) +static void _generic_unldr_x(jit_state_t*, + jit_int32_t, jit_int32_t, jit_word_t); +# define generic_unldi_x(r0, i0, i1) _generic_unldi_x(_jit, r0, i0, i1) +static void _generic_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t); +# define generic_unstr_x(r0, r1, i0) _generic_unstr_x(_jit, r0, r1, i0) +static void _generic_unstr_x(jit_state_t*, + jit_int32_t, jit_int32_t, jit_word_t); +# define generic_unsti_x(i0, r0, i1) _generic_unsti_x(_jit, i0, r0, i1) +static void _generic_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); +#endif +#define patch_alist(revert) _patch_alist(_jit, revert) +static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert); + #if defined(__i386__) || defined(__x86_64__) # include "jit_x86.c" #elif defined(__mips__) @@ -3841,6 +4359,8 @@ generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); # include "jit_alpha.c" #elif defined(__riscv) # include "jit_riscv.c" +#elif defined(__loongarch__) +# include "jit_loongarch.c" #endif static maybe_unused void @@ -3886,3 +4406,1101 @@ generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(reg); } #endif + +static void +_depi(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1, jit_word_t i2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + depr(r0, rn(reg), i1, i2); + jit_unget_reg(reg); +} + +static void +_negi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + movi(r0, -i0); +} + +static void +_comi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + movi(r0, ~i0); +} + +static void +_exti_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + movi(r0, (jit_int8_t)i0); +} + +static void +_exti_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + movi(r0, (jit_uint8_t)i0); +} + +static void +_exti_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + movi(r0, (jit_int16_t)i0); +} + +static void +_exti_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + movi(r0, (jit_uint16_t)i0); +} + +#if __WORDSIZE == 64 +static void +_exti_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + movi(r0, (jit_int32_t)i0); +} + +static void +_exti_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + movi(r0, (jit_uint32_t)i0); +} +#endif + +static void +_bswapi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + union { + jit_uint16_t us; + jit_uint8_t v[2]; + } l, h; + l.us = i0; + h.v[0] = l.v[1]; + h.v[1] = l.v[0]; + movi(r0, h.us); +} + +static void +_bswapi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + union { + jit_uint32_t ui; + jit_uint8_t v[4]; + } l, h; + l.ui = i0; + h.v[0] = l.v[3]; + h.v[1] = l.v[2]; + h.v[2] = l.v[1]; + h.v[3] = l.v[0]; + movi(r0, h.ui); +} + +#if __WORDSIZE == 64 +static void +_bswapi_ul(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + union { + jit_uint64_t ul; + jit_uint8_t v[8]; + } l, h; + l.ul = i0; + h.v[0] = l.v[7]; + h.v[1] = l.v[6]; + h.v[2] = l.v[5]; + h.v[3] = l.v[4]; + h.v[4] = l.v[3]; + h.v[5] = l.v[2]; + h.v[6] = l.v[1]; + h.v[7] = l.v[0]; + movi(r0, h.ul); +} +#endif + +static void +_htoni_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN + bswapi_us(r0, i0); +#else + exti_us(r0, i0); +#endif +} + +static void +_htoni_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN + bswapi_ui(r0, i0); +#else +# if __WORDSIZE == 32 + movi(r0, i0); +# else + exti_ui(r0, i0); +# endif +#endif +} + +#if __WORDSIZE == 64 +static void +_htoni_ul(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ +# if __BYTE_ORDER == __LITTLE_ENDIAN + bswapi_ul(r0, i0); +# else + movi(r0, i0); +# endif +} +#endif + +static void +_movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + data.f = i0; +#if defined(__ia64__) + /* Should be used only in this case (with out0 == 120) */ + if (r0 >= 120) + r0 = _jitc->rout + (r0 - 120); +#endif + movi(r0, data.i); +} + +#if __WORDSIZE == 32 && !(defined(__mips__) && NEW_ABI) +static void +_movi_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) +{ + union { + jit_int32_t i[2]; + jit_float64_t d; + } data; + data.d = i0; + /* Mips does not change byte order of double values */ +# if __BYTE_ORDER == __LITTLE_ENDIAN || defined(__mips__) + movi(r0, data.i[0]); + movi(r1, data.i[1]); +# else + movi(r1, data.i[0]); + movi(r0, data.i[1]); +# endif +} + +#else +static void +_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) +{ + union { + jit_int64_t l; + jit_float64_t d; + } data; + data.d = i0; +# if defined(__ia64__) + /* Should be used only in this case (with out0 == 120) */ + if (r0 >= 120) + r0 = _jitc->rout + (r0 - 120); +# endif + movi(r0, data.l); +} +#endif + + void +_jit_negi_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v) +{ + jit_inc_synth_wf(negi_f, u, v); + jit_movi_f(u, v); + jit_negr_f(u, u); + jit_dec_synth(); +} + +void +_jit_absi_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v) +{ + jit_inc_synth_wf(absi_f, u, v); + jit_movi_f(u, v); + jit_absr_f(u, u); + jit_dec_synth(); +} + +void +_jit_sqrti_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v) +{ + jit_inc_synth_wf(sqrti_f, u, v); + jit_movi_f(u, v); + jit_sqrtr_f(u, u); + jit_dec_synth(); +} + +void +_jit_fmai_f(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x) +{ + jit_int32_t y; + jit_inc_synth_wqf(fmai_f, u, v, w, x); + if (u != v && u != w) { + jit_movi_f(u, x); + jit_fmar_f(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_f(y, x); + jit_fmar_f(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fmsi_f(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x) +{ + jit_int32_t y; + jit_inc_synth_wqf(fmai_f, u, v, w, x); + if (u != v && u != w) { + jit_movi_f(u, x); + jit_fmsr_f(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_f(y, x); + jit_fmsr_f(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fnmai_f(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x) +{ + jit_int32_t y; + jit_inc_synth_wqf(fmai_f, u, v, w, x); + if (u != v && u != w) { + jit_movi_f(u, x); + jit_fnmar_f(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_f(y, x); + jit_fnmar_f(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fnmsi_f(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x) +{ + jit_int32_t y; + jit_inc_synth_wqf(fmai_f, u, v, w, x); + if (u != v && u != w) { + jit_movi_f(u, x); + jit_fnmsr_f(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_f(y, x); + jit_fnmsr_f(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_negi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v) +{ + jit_inc_synth_wd(negi_d, u, v); + jit_movi_d(u, v); + jit_negr_d(u, u); + jit_dec_synth(); +} + +void +_jit_absi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v) +{ + jit_inc_synth_wd(absi_d, u, v); + jit_movi_d(u, v); + jit_absr_d(u, u); + jit_dec_synth(); +} + +void +_jit_sqrti_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v) +{ + jit_inc_synth_wd(sqrti_d, u, v); + jit_movi_d(u, v); + jit_sqrtr_d(u, u); + jit_dec_synth(); +} + +void +_jit_fmai_d(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x) +{ + jit_int32_t y; + jit_inc_synth_wqd(fmai_d, u, v, w, x); + if (u != v && u != w) { + jit_movi_d(u, x); + jit_fmar_d(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_d(y, x); + jit_fmar_d(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fmsi_d(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x) +{ + jit_int32_t y; + jit_inc_synth_wqd(fmai_d, u, v, w, x); + if (u != v && u != w) { + jit_movi_d(u, x); + jit_fmsr_d(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_d(y, x); + jit_fmsr_d(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fnmai_d(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x) +{ + jit_int32_t y; + jit_inc_synth_wqd(fmai_d, u, v, w, x); + if (u != v && u != w) { + jit_movi_d(u, x); + jit_fnmar_d(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_d(y, x); + jit_fnmar_d(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +void +_jit_fnmsi_d(jit_state_t *_jit, + jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x) +{ + jit_int32_t y; + jit_inc_synth_wqd(fmai_d, u, v, w, x); + if (u != v && u != w) { + jit_movi_d(u, x); + jit_fnmsr_d(u, v, w, u); + } + else { + y = jit_get_reg(jit_class_fpr); + jit_movi_d(y, x); + jit_fnmsr_d(u, v, w, y); + jit_unget_reg(y); + } + jit_dec_synth(); +} + +static void +_cloi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + clzi(r0, ~i0); +} + +static void +_clzi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ +#if __WORDSIZE == 64 && _WIN32 + movi(r0, (i0) ? __builtin_clzll(i0) : __WORDSIZE); +#else + movi(r0, (i0) ? __builtin_clzl(i0) : __WORDSIZE); +#endif +} + +static void +_ctoi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + ctzi(r0, ~i0); +} + +static void +_ctzi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ +#if __WORDSIZE == 64 && _WIN32 + movi(r0, (i0) ? __builtin_ctzll(i0) : __WORDSIZE); +#else + movi(r0, (i0) ? __builtin_ctzl(i0) : __WORDSIZE); +#endif +} + +static void +_rbiti(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t i; + union { + jit_uword_t w; + jit_uint8_t v[__WORDSIZE >> 3]; + } u, v; + static const unsigned char swap_tab[256] = { + 0, 128, 64, 192, 32, 160, 96, 224, + 16, 144, 80, 208, 48, 176, 112, 240, + 8, 136, 72, 200, 40, 168, 104, 232, + 24, 152, 88, 216 ,56, 184, 120, 248, + 4, 132, 68, 196, 36, 164, 100, 228, + 20, 148, 84, 212, 52, 180, 116, 244, + 12, 140, 76, 204, 44, 172, 108, 236, + 28, 156, 92, 220, 60, 188, 124, 252, + 2, 130, 66, 194, 34, 162, 98, 226, + 18, 146, 82, 210, 50, 178, 114, 242, + 10, 138, 74, 202, 42, 170, 106, 234, + 26, 154, 90, 218, 58, 186, 122, 250, + 6, 134, 70, 198, 38, 166, 102, 230, + 22, 150, 86, 214, 54, 182, 118, 246, + 14, 142, 78, 206, 46, 174, 110, 238, + 30, 158, 94, 222, 62, 190, 126, 254, + 1, 129, 65, 193, 33, 161, 97, 225, + 17, 145, 81, 209, 49, 177, 113, 241, + 9, 137, 73, 201, 41, 169, 105, 233, + 25, 153, 89, 217, 57, 185, 121, 249, + 5, 133, 69, 197, 37, 165, 101, 229, + 21, 149, 85, 213, 53, 181, 117, 245, + 13, 141, 77, 205, 45, 173, 109, 237, + 29, 157, 93, 221, 61, 189, 125, 253, + 3, 131, 67, 195, 35, 163, 99, 227, + 19, 147, 83, 211, 51, 179, 115, 243, + 11, 139, 75, 203, 43, 171, 107, 235, + 27, 155, 91, 219, 59, 187, 123, 251, + 7, 135, 71, 199, 39, 167, 103, 231, + 23, 151, 87, 215, 55, 183, 119, 247, + 15, 143, 79, 207, 47, 175, 111, 239, + 31, 159, 95, 223, 63, 191, 127, 255 + }; + u.w = i0; + for (i = 0; i < sizeof(jit_word_t); ++i) + v.v[i] = swap_tab[u.v[sizeof(jit_word_t) - i - 1]]; + movi(r0, v.w); +} + +static void +_popcnti(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ +#if __WORDSIZE == 64 && _WIN32 + movi(r0, (i0) ? __builtin_popcountll(i0) : __WORDSIZE); +#else + movi(r0, (i0) ? __builtin_popcountl(i0) : __WORDSIZE); +#endif +} + +static void _exti(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1, jit_word_t i2) +{ +#if __BYTE_ORDER == __BIG_ENDIAN + i1 = __WORDSIZE - (i1 + i2); +#endif + i0 <<= __WORDSIZE - (i1 + i2); + i0 >>= __WORDSIZE - i2; + movi(r0, i0); +} + +static void _exti_u(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1, jit_word_t i2) +{ + jit_word_t t; +#if __BYTE_ORDER == __BIG_ENDIAN + i1 = __WORDSIZE - (i1 + i2); +#endif + if (i1) + i0 >>= __WORDSIZE - i2; +#if __WORDSIZE == 64 && !_WIN32 + i0 &= (1L << i2) - 1; +#else + i0 &= (1LL << i2) - 1; +#endif + movi(r0, i0); +} + +static void +_generic_unldr(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; + assert(i0 >= 1 && i0 <= sizeof(jit_word_t)); + if (i0 & (i0 - 1)) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + movr(r2, r1); + } + switch (i0) { + case 1: + ldr_c(r0, r1); + break; + case 2: + ldr_s(r0, r1); + break; + case 3: +#if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r2); + ldxi_c(r2, r2, 2); + lshi(r2, r2, 16); +#else + ldr_c(r0, r2); + lshi(r0, r0, 16); + ldxi_us(r2, r2, 1); +#endif + break; +#if __WORDSIZE == 32 + default: + ldr_i(r0, r1); + break; +#else + case 4: + ldr_i(r0, r1); + break; + case 5: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r2); + ldxi_c(r2, r2, 4); + lshi(r2, r2, 32); +# else + ldr_i(r0, r2); + lshi(r0, r0, 8); + ldxi_uc(r2, r2, 4); +# endif + break; + case 6: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r2); + ldxi_s(r2, r2, 4); + lshi(r2, r2, 32); +# else + ldr_i(r0, r2); + lshi(r0, r0, 16); + ldxi_us(r2, r2, 4); +# endif + break; + case 7: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r2); + ldxi_i(r2, r2, 4); + lshi(r2, r2, 40); + rshi(r2, r2, 8); +# else + ldr_i(r0, r2); + lshi(r0, r0, 24); + ldxi_ui(r2, r2, 4); + rshi(r2, r2, 8); +# endif + break; + default: + ldr_l(r0, r1); + break; +#endif + } + if (i0 & (i0 - 1)) { + orr(r0, r0, r2); + jit_unget_reg(t0); + } +} + +static void +_generic_unldi(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + assert(i1 >= 1 && i1 <= sizeof(jit_word_t)); + if (i1 & (i1 - 1)) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + } + switch (i1) { + case 1: + ldi_c(r0, i0); + break; + case 2: + ldi_s(r0, i0); + break; + case 3: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i0); + ldi_c(r2, i0 + 2); + lshi(r2, r2, 16); +# else + ldi_c(r0, i0); + lshi(r0, r0, 16); + ldi_us(r2, i0 + 1); +# endif + break; +# if __WORDSIZE == 32 + default: + ldi_i(r0, i0); + break; +# else + case 4: + ldi_i(r0, i0); + break; + case 5: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i0); + ldi_c(r2, i0 + 4); + lshi(r2, r2, 32); +# else + ldi_i(r0, i0); + lshi(r0, r0, 8); + ldi_uc(r2, i0 + 4); +# endif + break; + case 6: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i0); + ldi_s(r2, i0 + 4); + lshi(r2, r2, 32); +# else + ldi_i(r0, i0); + lshi(r0, r0, 16); + ldi_us(r2, i0 + 4); +# endif + break; + case 7: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i0); + ldi_i(r2, i0 + 4); + lshi(r2, r2, 40); + rshi(r2, r2, 8); +# else + ldi_i(r0, i0); + lshi(r0, r0, 24); + ldi_ui(r2, i0 + 4); + rshi(r2, r2, 8); +# endif + break; + default: + ldi_l(r0, i0); + break; +# endif + } + if (i1 & (i1 - 1)) { + orr(r0, r0, r2); + jit_unget_reg(t0); + } +} + +static void +_generic_unldr_u(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t t0, r2; + assert(i0 >= 1 && i0 <= sizeof(jit_word_t)); + if (i0 & (i0 - 1)) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + movr(r2, r1); + } + switch (i0) { + case 1: + ldr_uc(r0, r1); + break; + case 2: + ldr_us(r0, r1); + break; + case 3: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_us(r0, r2); + ldxi_uc(r2, r2, 2); + lshi(r2, r2, 16); +# else + ldr_uc(r0, r2); + lshi(r0, r0, 16); + ldxi_us(r2, r2, 1); +# endif + break; +# if __WORDSIZE == 32 + default: + ldr_i(r0, r1); + break; +# else + case 4: + ldr_ui(r0, r1); + break; + case 5: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r2); + ldxi_uc(r2, r2, 4); + lshi(r2, r2, 32); +# else + ldr_ui(r0, r2); + lshi(r0, r0, 8); + ldxi_uc(r2, r2, 4); +# endif + break; + case 6: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r2); + ldxi_us(r2, r2, 4); + lshi(r2, r2, 32); +# else + ldr_ui(r0, r2); + lshi(r0, r0, 16); + ldxi_us(r2, r2, 4); +# endif + break; + case 7: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldr_ui(r0, r2); + ldxi_ui(r2, r2, 4); + lshi(r2, r2, 40); + rshi_u(r2, r2, 8); +# else + ldr_ui(r0, r2); + lshi(r0, r0, 24); + ldxi_ui(r2, r2, 4); + rshi(r2, r2, 8); +# endif + break; + default: + ldr_l(r0, r1); + break; +# endif + } + if (i0 & (i0 - 1)) { + orr(r0, r0, r2); + jit_unget_reg(t0); + } +} + +static void +_generic_unldi_u(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + jit_int32_t t0, r2; + assert(i1 >= 1 && i1 <= sizeof(jit_word_t)); + if (i1 & (i1 - 1)) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + } + switch (i1) { + case 1: + ldi_uc(r0, i0); + break; + case 2: + ldi_us(r0, i0); + break; + case 3: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_us(r0, i0); + ldi_uc(r2, i0 + 2); + lshi(r2, r2, 16); +# else + ldi_uc(r0, i0); + lshi(r0, r0, 16); + ldi_us(r2, i0 + 1); +# endif + break; +# if __WORDSIZE == 32 + default: + ldi_i(r0, i0); + break; +# else + case 4: + ldi_ui(r0, i0); + break; + case 5: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i0); + ldi_uc(r2, i0 + 4); + lshi(r2, r2, 32); +# else + ldi_ui(r0, i0); + lshi(r0, r0, 8); + ldi_uc(r2, i0 + 4); +# endif + break; + case 6: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i0); + ldi_us(r2, i0 + 4); + lshi(r2, r2, 32); +# else + ldi_ui(r0, i0); + lshi(r0, r0, 16); + ldi_us(r2, i0 + 4); +# endif + break; + case 7: +# if __BYTE_ORDER == __LITTLE_ENDIAN + ldi_ui(r0, i0); + ldi_ui(r2, i0 + 4); + lshi(r2, r2, 40); + rshi_u(r2, r2, 8); +# else + ldi_ui(r0, i0); + lshi(r0, r0, 24); + ldi_ui(r2, i0 + 4); + rshi(r2, r2, 8); +# endif + break; + default: + ldi_l(r0, i0); + break; +# endif + } + if (i1 & (i1 - 1)) { + orr(r0, r0, r2); + jit_unget_reg(t0); + } +} + +static void +_generic_unstr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_word_t t0, r2; + assert(i0 > 0 && i0 <= sizeof(jit_word_t)); + if (i0 & (i0 - 1)) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + } + switch (i0) { + case 1: + str_c(r0, r1); + break; + case 2: + str_s(r0, r1); + break; + case 3: +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_s(r0, r1); + rshi(r2, r1, 16); + stxi_c(2, r0, r2); +# else + stxi_c(2, r0, r1); + rshi(r2, r1, 8); + str_s(r0, r2); +# endif + break; +# if __WORDSIZE == 32 + default: + str_i(r0, r1); + break; +# else + case 4: + str_i(r0, r1); + break; + case 5: +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_i(r0, r1); + rshi(r2, r1, 32); + stxi_c(4, r0, r2); +# else + stxi_c(4, r0, r1); + rshi(r2, r1, 8); + str_i(r0, r2); +# endif + break; + case 6: +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_i(r0, r1); + rshi(r2, r1, 32); + stxi_s(4, r0, r2); +# else + stxi_s(4, r0, r1); + rshi(r2, r1, 16); + str_i(r0, r2); +# endif + break; + case 7: +# if __BYTE_ORDER == __LITTLE_ENDIAN + str_i(r0, r1); + rshi(r2, r1, 32); + stxi_s(4, r0, r2); + rshi(r2, r2, 16); + stxi_c(6, r0, r2); +# else + stxi_c(6, r0, r1); + rshi(r2, r1, 8); + stxi_s(4, r0, r2); + rshi(r2, r2, 16); + str_i(r0, r2); +# endif + break; + default: + str_l(r0, r1); + break; +# endif + } + if (i0 & (i0 - 1)) + jit_unget_reg(t0); +} + +static void +_generic_unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t t0, r2; + assert(i1 > 0 && i1 <= sizeof(jit_word_t)); + if (i1 & (i1 - 1)) { + t0 = jit_get_reg(jit_class_gpr); + r2 = rn(t0); + } + switch (i1) { + case 1: + sti_c(i0, r0); + break; + case 2: + sti_s(i0, r0); + break; + case 3: +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_s(i0, r0); + rshi(r2, r0, 16); + sti_c(2 + i0, r2); +# else + sti_c(2 + i0, r0); + rshi(r2, r0, 8); + sti_s(i0, r2); +# endif + break; +# if __WORDSIZE == 32 + default: + sti_i(i0, r0); + break; +# else + case 4: + sti_i(i0, r0); + break; + case 5: +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_i(i0, r0); + rshi(r2, r0, 32); + sti_c(4 + i0, r2); +# else + stxi_c(4, i0, r0); + rshi(r2, r0, 8); + sti_i(i0, r2); +# endif + break; + case 6: +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_i(i0, r0); + rshi(r2, r0, 32); + sti_s(4 + i0, r2); +# else + sti_s(4 + i0, r0); + rshi(r2, r0, 16); + sti_i(i0, r2); +# endif + break; + case 7: +# if __BYTE_ORDER == __LITTLE_ENDIAN + sti_i(i0, r0); + rshi(r2, r0, 32); + sti_s(4 + i0, r2); + rshi(r2, r2, 16); + sti_c(6 + i0, r2); +# else + sti_c(6 + i0, r0); + rshi(r2, r0, 8); + sti_s(4 + i0, r2); + rshi(r2, r2, 16); + sti_i(i0, r2); +# endif + break; + default: + sti_l(i0, r0); + break; +# endif + } + if (i1 & (i1 - 1)) + jit_unget_reg(t0); +} + +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) +static void +_generic_unldr_x(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + assert(i0 == 4 || i0 == 8); + if (i0 == 4) + ldr_f(r0, r1); + else + ldr_d(r0, r1); +} + +static void +_generic_unldi_x(jit_state_t *_jit, + jit_int32_t r0, jit_word_t i0, jit_word_t i1) +{ + assert(i1 == 4 || i1 == 8); + if (i1 == 4) + ldi_f(r0, i0); + else + ldi_d(r0, i0); +} + +static void +_generic_unstr_x(jit_state_t *_jit, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + assert(i0 == 4 || i0 == 8); + if (i0 == 4) + str_f(r0, r1); + else + str_d(r0, r1); +} + +static void +_generic_unsti_x(jit_state_t *_jit, + jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + assert(i1 == 4 || i1 == 8); + if (i1 == 4) + sti_f(i0, r0); + else + sti_d(i0, r0); +} +#endif + +#if defined(stack_framesize) +static maybe_unused void +_patch_alist(jit_state_t *_jit, jit_bool_t revert) +{ + jit_int32_t diff; + jit_node_t *node; + diff = jit_diffsize(); + if (diff) { + if (revert) + diff = -diff; + for (node = _jitc->function->alist; node; node = node->link) { + switch (node->code) { + case jit_code_ldxi_c: case jit_code_ldxi_uc: + case jit_code_ldxi_s: case jit_code_ldxi_us: + case jit_code_ldxi_i: +#if __WORDSIZE == 64 + case jit_code_ldxi_ui: case jit_code_ldxi_l: +#endif + case jit_code_ldxi_f: case jit_code_ldxi_d: + node->w.w -= diff; + break; + case jit_code_stxi_c: case jit_code_stxi_s: + case jit_code_stxi_i: +#if __WORDSIZE == 64 + case jit_code_stxi_l: +#endif + case jit_code_stxi_f: case jit_code_stxi_d: + node->u.w -= diff; + break; + default: + abort(); + } + } + } +} +#endif