git subrepo pull (merge) --force deps/lightning
[pcsx_rearmed.git] / deps / lightning / lib / lightning.c
index 507abb6..39c4fc2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2019  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -19,9 +19,7 @@
 
 #include <lightning.h>
 #include <lightning/jit_private.h>
-#ifdef _WIN32
-#  include <mman.h>
-#else
+#if HAVE_MMAP
 #  include <sys/mman.h>
 #endif
 #if defined(__sgi)
@@ -66,13 +64,28 @@ static void _del_label(jit_state_t*, jit_node_t*, jit_node_t*);
 static void
 _jit_dataset(jit_state_t *_jit);
 
+#define block_update_set(block, target)        _block_update_set(_jit, block, target)
+static jit_bool_t _block_update_set(jit_state_t*, jit_block_t*, jit_block_t*);
+
+#define propagate_backward(block)      _propagate_backward(_jit, block)
+static void _propagate_backward(jit_state_t*, jit_block_t*);
+
+#define check_block_again()            _check_block_again(_jit)
+static jit_bool_t _check_block_again(jit_state_t*);
+
+#define do_setup()                     _do_setup(_jit)
+static void _do_setup(jit_state_t*);
+
 #define jit_setup(block)               _jit_setup(_jit, block)
 static void
 _jit_setup(jit_state_t *_jit, jit_block_t *block);
 
-#define jit_follow(block, todo)                _jit_follow(_jit, block, todo)
+#define do_follow(always)              _do_follow(_jit, always)
+static void _do_follow(jit_state_t*, jit_bool_t);
+
+#define jit_follow(block)              _jit_follow(_jit, block)
 static void
-_jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo);
+_jit_follow(jit_state_t *_jit, jit_block_t *block);
 
 #define jit_update(node, live, mask)   _jit_update(_jit, node, live, mask)
 static void
@@ -107,7 +120,7 @@ static jit_bool_t
 _reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node);
 
 #define redundant_store(node, jump)    _redundant_store(_jit, node, jump)
-static void
+static jit_bool_t
 _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump);
 
 #define simplify_movr(p, n, k, s)      _simplify_movr(_jit, p, n, k, s)
@@ -133,7 +146,7 @@ static void
 _simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno);
 
 #define simplify()                     _simplify(_jit)
-static void
+static jit_bool_t
 _simplify(jit_state_t *_jit);
 
 #define jit_reg_undef                  -1
@@ -214,8 +227,25 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec)
        for (regno = 0; regno < _jitc->reglen; regno++) {
            if ((jit_class(_rvs[regno].spec) & spec) == spec &&
                !jit_regset_tstbit(&_jitc->regarg, regno) &&
-               !jit_regset_tstbit(&_jitc->reglive, regno))
+               !jit_regset_tstbit(&_jitc->reglive, regno)) {
+               if (jit_regset_tstbit(&_jitc->regmask, regno)) {
+                   /* search further, attempting to find a truly known
+                   * free register, not just one in unknown state. */
+                   jit_int32_t regfree;
+
+                   for (regfree = regno + 1;
+                        regfree < _jitc->reglen; regfree++) {
+                       if ((jit_class(_rvs[regfree].spec) & spec) == spec &&
+                           !jit_regset_tstbit(&_jitc->regarg, regfree) &&
+                           !jit_regset_tstbit(&_jitc->reglive, regfree) &&
+                           !jit_regset_tstbit(&_jitc->regmask, regfree)) {
+                           regno = regfree;
+                           break;
+                       }
+                   }
+               }
                goto regarg;
+           }
        }
 
        /* search for a register matching spec that is not an argument
@@ -861,6 +891,7 @@ jit_new_state(void)
     jit_regset_new(&_jitc->regsav);
     jit_regset_new(&_jitc->reglive);
     jit_regset_new(&_jitc->regmask);
+    jit_regset_new(&_jitc->explive);
 
     jit_init();
 
@@ -960,10 +991,12 @@ _jit_destroy_state(jit_state_t *_jit)
 #if DEVEL_DISASSEMBLER
     jit_really_clear_state();
 #endif
+#if HAVE_MMAP
     if (!_jit->user_code)
        munmap(_jit->code.ptr, _jit->code.length);
     if (!_jit->user_data)
        munmap(_jit->data.ptr, _jit->data.length);
+#endif
     jit_free((jit_pointer_t *)&_jit);
 }
 
@@ -1138,6 +1171,35 @@ _jit_new_node_qww(jit_state_t *_jit, jit_code_t code,
     return (link_node(node));
 }
 
+jit_node_t *
+_jit_new_node_wqw(jit_state_t *_jit, jit_code_t code,
+                 jit_word_t u, jit_int32_t l,
+                 jit_int32_t h, jit_word_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    assert(l != h);
+    node->u.w = u;
+    node->v.q.l = l;
+    node->v.q.h = h;
+    node->w.w = w;
+    return (link_node(node));
+}
+
+jit_node_t *
+_jit_new_node_wwq(jit_state_t *_jit, jit_code_t code,
+                 jit_word_t u, jit_word_t v,
+                 jit_int32_t l, jit_int32_t h)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    node->v.w = v;
+    node->w.q.l = l;
+    node->w.q.h = h;
+    return (link_node(node));
+}
+
 jit_node_t *
 _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code,
                  jit_word_t u, jit_word_t v, jit_float32_t w)
@@ -1150,6 +1212,21 @@ _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code,
     return (link_node(node));
 }
 
+jit_node_t *
+_jit_new_node_wqf(jit_state_t *_jit, jit_code_t code,
+                 jit_word_t u, jit_int32_t l,
+                 jit_int32_t h, jit_float32_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    assert(l != h);
+    node->u.w = u;
+    node->v.q.l = l;
+    node->v.q.h = h;
+    node->w.f = w;
+    return (link_node(node));
+}
+
 jit_node_t *
 _jit_new_node_wwd(jit_state_t *_jit, jit_code_t code,
                  jit_word_t u, jit_word_t v, jit_float64_t w)
@@ -1162,6 +1239,21 @@ _jit_new_node_wwd(jit_state_t *_jit, jit_code_t code,
     return (link_node(node));
 }
 
+jit_node_t *
+_jit_new_node_wqd(jit_state_t *_jit, jit_code_t code,
+                 jit_word_t u, jit_int32_t l,
+                 jit_int32_t h, jit_float64_t w)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    assert(l != h);
+    node->u.w = u;
+    node->v.q.l = l;
+    node->v.q.h = h;
+    node->w.d = w;
+    return (link_node(node));
+}
+
 jit_node_t *
 _jit_new_node_pww(jit_state_t *_jit, jit_code_t code,
                  jit_pointer_t u, jit_word_t v, jit_word_t w)
@@ -1306,14 +1398,36 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
            mask = 0;
            break;
        case jit_code_live:     case jit_code_va_end:
-       case jit_code_retr:     case jit_code_retr_f:   case jit_code_retr_d:
-       case jit_code_pushargr: case jit_code_pushargr_f:
+       case jit_code_retr_c:   case jit_code_retr_uc:
+       case jit_code_retr_s:   case jit_code_retr_us:
+       case jit_code_retr_i:   case jit_code_retr_ui:
+       case jit_code_retr_l:
+       case jit_code_retr_f:   case jit_code_retr_d:
+       case jit_code_pushargr_c:
+       case jit_code_pushargr_uc:
+       case jit_code_pushargr_s:
+       case jit_code_pushargr_us:
+       case jit_code_pushargr_i:
+       case jit_code_pushargr_ui:
+       case jit_code_pushargr_l:
+       case jit_code_pushargr_f:
        case jit_code_pushargr_d:
        case jit_code_finishr:  /* synthesized will set jit_cc_a0_jmp */
            mask = jit_cc_a0_reg;
            break;
-       case jit_code_align:    case jit_code_reti:     case jit_code_pushargi:
-       case jit_code_finishi:  /* synthesized will set jit_cc_a0_jmp */
+       case jit_code_align:    case jit_code_skip:
+       case jit_code_reti_c:   case jit_code_reti_uc:
+       case jit_code_reti_s:   case jit_code_reti_us:
+       case jit_code_reti_i:   case jit_code_reti_ui:
+       case jit_code_reti_l:
+       case jit_code_pushargi_c:
+       case jit_code_pushargi_uc:
+       case jit_code_pushargi_s:
+       case jit_code_pushargi_us:
+       case jit_code_pushargi_i:
+       case jit_code_pushargi_ui:
+       case jit_code_pushargi_l:
+        case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */
            mask = jit_cc_a0_int;
            break;
        case jit_code_reti_f:   case jit_code_pushargi_f:
@@ -1325,7 +1439,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_allocai:
            mask = jit_cc_a0_int|jit_cc_a1_int;
            break;
-       case jit_code_arg:      case jit_code_arg_f:    case jit_code_arg_d:
+       case jit_code_arg_c:    case jit_code_arg_s:
+       case jit_code_arg_i:    case jit_code_arg_l:
+       case jit_code_arg_f:    case jit_code_arg_d:
            mask = jit_cc_a0_int|jit_cc_a0_arg;
            break;
        case jit_code_calli:    case jit_code_jmpi:
@@ -1349,11 +1465,17 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_getarg_f: case jit_code_getarg_d:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg;
            break;
-       case jit_code_putargr:  case jit_code_putargr_f:
-       case jit_code_putargr_d:
+       case jit_code_putargr_c:case jit_code_putargr_uc:
+       case jit_code_putargr_s:case jit_code_putargr_us:
+       case jit_code_putargr_i:case jit_code_putargr_ui:
+       case jit_code_putargr_l:
+       case jit_code_putargr_f:case jit_code_putargr_d:
            mask = jit_cc_a0_reg|jit_cc_a1_arg;
            break;
-       case jit_code_putargi:
+       case jit_code_putargi_c:case jit_code_putargi_uc:
+       case jit_code_putargi_s:case jit_code_putargi_us:
+       case jit_code_putargi_i:case jit_code_putargi_ui:
+       case jit_code_putargi_l:
            mask = jit_cc_a0_int|jit_cc_a1_arg;
            break;
        case jit_code_putargi_f:
@@ -1366,12 +1488,34 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_ldi_s:    case jit_code_ldi_us:   case jit_code_ldi_i:
        case jit_code_ldi_ui:   case jit_code_ldi_l:    case jit_code_ldi_f:
        case jit_code_ldi_d:
+       case jit_code_negi:     case jit_code_comi:
+       case jit_code_exti_c:   case jit_code_exti_uc:
+       case jit_code_exti_s:   case jit_code_exti_us:
+       case jit_code_exti_i:   case jit_code_exti_ui:
+       case jit_code_bswapi_us:case jit_code_bswapi_ui:
+       case jit_code_bswapi_ul:
+       case jit_code_htoni_us: case jit_code_htoni_ui:
+       case jit_code_htoni_ul:
+       case jit_code_cloi:     case jit_code_clzi:
+       case jit_code_ctoi:     case jit_code_ctzi:
+       case jit_code_rbiti:    case jit_code_popcnti:
+       case jit_code_movi_w_f: case jit_code_movi_w_d:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int;
            break;
+       case jit_code_unldi:    case jit_code_unldi_u:  case jit_code_unldi_x:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|jit_cc_a2_int;
+           break;
+       case jit_code_movi_ww_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|jit_cc_a2_int;
+           break;
        case jit_code_movi_f:   case jit_code_movi_f_w:
+       case jit_code_negi_f:   case jit_code_absi_f:
+       case jit_code_sqrti_f:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_flt;
            break;
        case jit_code_movi_d:   case jit_code_movi_d_w:
+       case jit_code_negi_d:   case jit_code_absi_d:
+       case jit_code_sqrti_d:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_dbl;
            break;
        case jit_code_movi_d_ww:
@@ -1384,6 +1528,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_truncr_f_i:                       case jit_code_truncr_f_l:
        case jit_code_truncr_d_i:                       case jit_code_truncr_d_l:
        case jit_code_htonr_us: case jit_code_htonr_ui: case jit_code_htonr_ul:
+       case jit_code_bswapr_us:        case jit_code_bswapr_ui:        case jit_code_bswapr_ul:
        case jit_code_ldr_c:    case jit_code_ldr_uc:
        case jit_code_ldr_s:    case jit_code_ldr_us:   case jit_code_ldr_i:
        case jit_code_ldr_ui:   case jit_code_ldr_l:    case jit_code_negr_f:
@@ -1391,7 +1536,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_extr_f:   case jit_code_extr_d_f: case jit_code_ldr_f:
        case jit_code_negr_d:   case jit_code_absr_d:   case jit_code_sqrtr_d:
        case jit_code_movr_d:   case jit_code_extr_d:   case jit_code_extr_f_d:
-       case jit_code_ldr_d:
+       case jit_code_ldr_d:    case jit_code_rbitr:    case jit_code_popcntr:
+       case jit_code_clor:     case jit_code_clzr:
+       case jit_code_ctor:     case jit_code_ctzr:
        case jit_code_movr_w_f: case jit_code_movr_f_w:
        case jit_code_movr_w_d: case jit_code_movr_d_w:
        case jit_code_va_arg:   case jit_code_va_arg_d:
@@ -1404,20 +1551,25 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_addi:     case jit_code_addxi:    case jit_code_addci:
        case jit_code_subi:     case jit_code_subxi:    case jit_code_subci:
        case jit_code_rsbi:
-       case jit_code_muli:     case jit_code_divi:     case jit_code_divi_u:
+       case jit_code_muli:     case jit_code_hmuli:    case jit_code_hmuli_u:
+       case jit_code_divi:     case jit_code_divi_u:
        case jit_code_remi:     case jit_code_remi_u:   case jit_code_andi:
        case jit_code_ori:      case jit_code_xori:     case jit_code_lshi:
-       case jit_code_rshi:     case jit_code_rshi_u:   case jit_code_lti:
+       case jit_code_rshi:     case jit_code_rshi_u:   case jit_code_lroti:
+       case jit_code_rroti:    case jit_code_lti:
        case jit_code_lti_u:    case jit_code_lei:      case jit_code_lei_u:
        case jit_code_eqi:      case jit_code_gei:      case jit_code_gei_u:
        case jit_code_gti:      case jit_code_gti_u:    case jit_code_nei:
        case jit_code_ldxi_c:   case jit_code_ldxi_uc:  case jit_code_ldxi_s:
        case jit_code_ldxi_us:  case jit_code_ldxi_i:   case jit_code_ldxi_ui:
        case jit_code_ldxi_l:   case jit_code_ldxi_f:   case jit_code_ldxi_d:
+       case jit_code_unldr:    case jit_code_unldr_u:  case jit_code_unldr_x:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_int;
            break;
        case jit_code_qmuli:    case jit_code_qmuli_u:
        case jit_code_qdivi:    case jit_code_qdivi_u:
+       case jit_code_qlshi:    case jit_code_qlshi_u:
+       case jit_code_qrshi:    case jit_code_qrshi_u:
            mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg|
                   jit_cc_a1_reg|jit_cc_a2_int;
            break;
@@ -1441,10 +1593,12 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
            break;
        case jit_code_addr:     case jit_code_addxr:    case jit_code_addcr:
        case jit_code_subr:     case jit_code_subxr:    case jit_code_subcr:
-       case jit_code_mulr:     case jit_code_divr:     case jit_code_divr_u:
+       case jit_code_mulr:     case jit_code_hmulr:    case jit_code_hmulr_u:
+       case jit_code_divr:     case jit_code_divr_u:
        case jit_code_remr:     case jit_code_remr_u:   case jit_code_andr:
        case jit_code_orr:      case jit_code_xorr:     case jit_code_lshr:
-       case jit_code_rshr:     case jit_code_rshr_u:   case jit_code_ltr:
+       case jit_code_rshr:     case jit_code_rshr_u:   case jit_code_lrotr:
+       case jit_code_rrotr:    case jit_code_ltr:
        case jit_code_ltr_u:    case jit_code_ler:      case jit_code_ler_u:
        case jit_code_eqr:      case jit_code_ger:      case jit_code_ger_u:
        case jit_code_gtr:      case jit_code_gtr_u:    case jit_code_ner:
@@ -1468,6 +1622,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
            break;
        case jit_code_qmulr:    case jit_code_qmulr_u:
        case jit_code_qdivr:    case jit_code_qdivr_u:
+       case jit_code_qlshr:    case jit_code_qlshr_u:
+       case jit_code_qrshr:    case jit_code_qrshr_u:
            mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg|
                   jit_cc_a1_reg|jit_cc_a2_reg;
            break;
@@ -1475,6 +1631,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_sti_l:    case jit_code_sti_f:    case jit_code_sti_d:
            mask = jit_cc_a0_int|jit_cc_a1_reg;
            break;
+       case jit_code_unsti:    case jit_code_unsti_x:
+           mask = jit_cc_a0_int|jit_cc_a1_reg|jit_cc_a2_int;
+           break;
        case jit_code_blti:     case jit_code_blti_u:   case jit_code_blei:
        case jit_code_blei_u:   case jit_code_beqi:     case jit_code_bgei:
        case jit_code_bgei_u:   case jit_code_bgti:     case jit_code_bgti_u:
@@ -1501,6 +1660,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_str_l:    case jit_code_str_f:    case jit_code_str_d:
            mask = jit_cc_a0_reg|jit_cc_a1_reg;
            break;
+       case jit_code_unstr:    case jit_code_unstr_x:
+           mask = jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_int;
+           break;
        case jit_code_stxi_c:   case jit_code_stxi_s:   case jit_code_stxi_i:
        case jit_code_stxi_l:   case jit_code_stxi_f:   case jit_code_stxi_d:
            mask = jit_cc_a0_int|jit_cc_a1_reg|jit_cc_a2_reg;
@@ -1535,6 +1697,40 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_bxsubr:   case jit_code_bxsubr_u:
            mask = jit_cc_a0_jmp|jit_cc_a1_reg|jit_cc_a1_chg|jit_cc_a2_reg;
            break;
+       case jit_code_movnr:    case jit_code_movzr:
+           mask = jit_cc_a0_reg|jit_cc_a0_cnd|jit_cc_a1_reg|jit_cc_a2_reg;
+           break;
+       case jit_code_casr:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|
+                  jit_cc_a2_reg|jit_cc_a2_rlh;
+           break;
+       case jit_code_casi:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|
+                  jit_cc_a2_reg|jit_cc_a2_rlh;
+           break;
+       case jit_code_extr:     case jit_code_extr_u:   case jit_code_depr:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_rlh;
+           break;
+       case jit_code_exti:     case jit_code_exti_u:   case jit_code_depi:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|jit_cc_a2_rlh;
+           break;
+       case jit_code_fmar_f:   case jit_code_fmar_d:
+       case jit_code_fmsr_f:   case jit_code_fmsr_d:
+       case jit_code_fnmar_f:  case jit_code_fnmar_d:
+       case jit_code_fnmsr_f:  case jit_code_fnmsr_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|
+                  jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_reg;
+           break;
+       case jit_code_fmai_f:   case jit_code_fmsi_f:
+       case jit_code_fnmai_f:  case jit_code_fnmsi_f:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|
+                  jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_flt;
+           break;
+       case jit_code_fmai_d:   case jit_code_fmsi_d:
+       case jit_code_fnmai_d:  case jit_code_fnmsi_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|
+                  jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl;
+           break;
        default:
            abort();
     }
@@ -1545,8 +1741,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
 void
 _jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address)
 {
+#ifndef NDEBUG
     jit_int32_t                mask;
-
+#endif
     switch (instr->code) {
        case jit_code_movi:     case jit_code_ldi_c:    case jit_code_ldi_uc:
        case jit_code_ldi_s:    case jit_code_ldi_us:   case jit_code_ldi_i:
@@ -1559,7 +1756,9 @@ _jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address)
            instr->u.p = address;
            break;
        default:
+#ifndef NDEBUG
            mask = jit_classify(instr->code);
+#endif
            assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp);
            instr->u.p = address;
     }
@@ -1568,8 +1767,9 @@ _jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address)
 void
 _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label)
 {
+#ifndef NDEBUG
     jit_int32_t                mask;
-
+#endif
     assert(!(instr->flag & jit_flag_node));
     instr->flag |= jit_flag_node;
     switch (instr->code) {
@@ -1586,7 +1786,9 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label)
            instr->u.n = label;
            break;
        default:
+#ifndef NDEBUG
            mask = jit_classify(instr->code);
+#endif
            assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp);
            assert(label->code == jit_code_label);
            instr->u.n = label;
@@ -1597,57 +1799,191 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label)
     label->link = instr;
 }
 
-void
-_jit_optimize(jit_state_t *_jit)
+static void
+_do_setup(jit_state_t *_jit)
 {
-    jit_bool_t          jump;
-    jit_bool_t          todo;
-    jit_int32_t                 mask;
-    jit_node_t         *node;
     jit_block_t                *block;
     jit_word_t          offset;
 
-    _jitc->function = NULL;
-
-    thread_jumps();
-    sequential_labels();
-    split_branches();
-
     /* create initial mapping of live register values
      * at the start of a basic block */
     for (offset = 0; offset < _jitc->blocks.offset; offset++) {
        block = _jitc->blocks.ptr + offset;
        if (!block->label)
            continue;
-       if (block->label->code != jit_code_epilog)
-           jit_setup(block);
+       if (block->label->code == jit_code_epilog) {
+           jit_regset_setbit(&block->reglive, JIT_RET);
+           jit_regset_setbit(&block->reglive, JIT_FRET);
+           jit_regset_com(&block->regmask, &block->reglive);
+           continue;
+       }
+       jit_setup(block);
     }
+}
+
+static jit_bool_t
+_block_update_set(jit_state_t *_jit,
+                 jit_block_t *block, jit_block_t *target)
+{
+    jit_regset_t       regmask;
+
+    jit_regset_ior(&regmask, &block->reglive, &target->reglive);
+    jit_regset_and(&regmask,  &regmask, &block->regmask);
+    if (jit_regset_set_p(&regmask)) {
+       jit_regset_ior(&block->reglive, &block->reglive, &regmask);
+       jit_regset_and(&regmask, &block->reglive, &block->regmask);
+       jit_regset_com(&regmask, &regmask);
+       jit_regset_and(&block->regmask, &block->regmask, &regmask);
+       block->again = 1;
+       return (1);
+    }
+    return (0);
+}
+
+static void
+_propagate_backward(jit_state_t *_jit, jit_block_t *block)
+{
+    jit_block_t                *prev;
+    jit_word_t          offset;
+
+    for (offset = block->label->v.w - 1;
+        offset >= 0; --offset)  {
+       prev = _jitc->blocks.ptr + offset;
+       if (!block_update_set(prev, block) ||
+           !(prev->label->flag & jit_flag_head))
+           break;
+    }
+}
+
+static jit_bool_t
+_check_block_again(jit_state_t *_jit)
+{
+    jit_int32_t                 todo;
+    jit_word_t          offset;
+    jit_node_t         *node, *label;
+    jit_block_t                *block, *target;
+
+    todo = 0;
+    for (offset = 0; offset < _jitc->blocks.offset; offset++) {
+       block = _jitc->blocks.ptr + offset;
+       if (block->again) {
+           todo = 1;
+           break;
+       }
+    }
+    /* If no block changed state */
+    if (!todo)
+       return (0);
 
-    /* set live state of registers not referenced in a block, but
-     * referenced in a jump target or normal flow */
     do {
        todo = 0;
-       for (offset = 0; offset < _jitc->blocks.offset; offset++) {
-           block = _jitc->blocks.ptr + offset;
-           if (!block->label)
+       block = NULL;
+       for (node = _jitc->head; node; node = node->next) {
+           /* Special jumps that match jit_cc_a0_jmp */
+           if (node->code == jit_code_calli || node->code == jit_code_callr)
                continue;
-           if (block->label->code != jit_code_epilog)
-               jit_follow(block, &todo);
+
+           /* Remember current label */
+           if (node->code == jit_code_label ||
+               node->code == jit_code_prolog ||
+               node->code == jit_code_epilog) {
+
+               /* If previous block does not pass through */
+               if (!(node->flag & jit_flag_head))
+                   block = NULL;
+
+               target = _jitc->blocks.ptr + node->v.w;
+               if (block && target->again && block_update_set(block, target)) {
+                   propagate_backward(block);
+                   todo = 1;
+               }
+               block = target;
+           }
+           /* If not the first jmpi */
+           else if (block) {
+               /* If a jump to dynamic address or if a jump to raw address */
+               if (!(jit_classify(node->code) & jit_cc_a0_jmp) ||
+                   !(node->flag & jit_flag_node))
+                   continue;
+               label = node->u.n;
+               /* Mark predecessor needs updating due to target change */
+               target = _jitc->blocks.ptr + label->v.w;
+               if (target->again && block_update_set(block, target)) {
+                   propagate_backward(block);
+                   todo = 1;
+               }
+           }
        }
-    } while (todo);
+    }
+    while (todo);
 
-    patch_registers();
-    simplify();
+    return (todo);
+}
 
-    /* figure out labels that are only reached with a jump
-     * and is required to do a simple redundant_store removal
-     * on jit_beqi below */
+static void
+_do_follow(jit_state_t *_jit, jit_bool_t always)
+{
+    jit_block_t                *block;
+    jit_word_t          offset;
+
+    /* set live state of registers not referenced in a block, but
+     * referenced in a jump target or normal flow */
+    for (offset = 0; offset < _jitc->blocks.offset; offset++) {
+       block = _jitc->blocks.ptr + offset;
+       if (!block->label || block->label->code == jit_code_epilog)
+           continue;
+       if (always || block->again) {
+           block->again = 0;
+           jit_follow(block);
+       }
+    }
+}
+
+void
+_jit_optimize(jit_state_t *_jit)
+{
+    jit_bool_t          jump;
+    jit_bool_t          todo;
+    jit_int32_t                 mask;
+    jit_node_t         *node;
+    jit_block_t                *block;
+    jit_word_t          offset;
+    jit_regset_t        regmask;
+
+    todo = 0;
+    _jitc->function = NULL;
+
+    thread_jumps();
+    sequential_labels();
+    split_branches();
+    do_setup();
+    do_follow(1);
+
+    jit_regset_set_ui(&regmask, 0);
+    for (offset = 0; offset < _jitc->reglen; offset++) {
+       if ((jit_class(_rvs[offset].spec) & (jit_class_gpr|jit_class_fpr)) &&
+           (jit_class(_rvs[offset].spec) & jit_class_sav) == jit_class_sav)
+           jit_regset_setbit(&regmask, offset);
+    }
+
+    /* Figure out labels that are only reached with a jump */
     jump = 1;
     for (node = _jitc->head; node; node = node->next) {
        switch (node->code) {
            case jit_code_label:
-               if (!jump)
+               if (!jump) {
                    node->flag |= jit_flag_head;
+                   if (!node->link) {
+                       /* Block is dead code or only reachable with an
+                        * indirect jumps. In such condition, must assume
+                        * all callee save registers are live. */
+                       block = _jitc->blocks.ptr + node->v.w;
+                       jit_regset_ior(&block->reglive,
+                                      &block->reglive, &regmask);
+                       /* Cleanup regmask */
+                       block_update_set(block, block);
+                   }
+               }
                break;
            case jit_code_jmpi:         case jit_code_jmpr:
            case jit_code_epilog:
@@ -1661,6 +1997,10 @@ _jit_optimize(jit_state_t *_jit)
        }
     }
 
+    patch_registers();
+    if (simplify())
+       todo = 1;
+
     for (node = _jitc->head; node; node = node->next) {
        mask = jit_classify(node->code);
        if (mask & jit_cc_a0_reg)
@@ -1669,69 +2009,98 @@ _jit_optimize(jit_state_t *_jit)
            node->v.w &= ~jit_regno_patch;
        if (mask & jit_cc_a2_reg)
            node->w.w &= ~jit_regno_patch;
-       switch (node->code) {
-           case jit_code_prolog:
-               _jitc->function = _jitc->functions.ptr + node->w.w;
-               break;
-           case jit_code_epilog:
-               _jitc->function = NULL;
-               break;
-           case jit_code_beqi:
-               redundant_store(node, 1);
-               break;
-           case jit_code_bnei:
-               redundant_store(node, 0);
-               break;
-           default:
+       if (node->code == jit_code_beqi) {
+           if (redundant_store(node, 1)) {
+               block = _jitc->blocks.ptr + ((jit_node_t *)node->u.n)->v.w;
+               block->again = 1;
+               todo = 1;
+           }
+       }
+       else if (node->code == jit_code_bnei) {
+           if (redundant_store(node, 0)) {
+               block = _jitc->blocks.ptr + ((jit_node_t *)node->u.n)->v.w;
+               block->again = 1;
+               todo = 1;
+           }
+       }
+    }
+
+    if (!todo)
+       todo = check_block_again();
+
+    /* If instructions were removed or first pass did modify the entry
+     * state of any block */
+    if (todo) {
+       do_setup();
+       todo = 0;
+       do {
+           do_follow(0);
+           /* If any block again has the entry state modified. */
+           todo = check_block_again();
+       } while (todo);
+    }
+
+    for (node = _jitc->head; node; node = node->next) {
+       mask = jit_classify(node->code);
+       if (mask & jit_cc_a0_reg)
+           node->u.w &= ~jit_regno_patch;
+       if (mask & jit_cc_a1_reg)
+           node->v.w &= ~jit_regno_patch;
+       if (mask & jit_cc_a2_reg)
+           node->w.w &= ~jit_regno_patch;
+       if  (node->code == jit_code_prolog)
+           _jitc->function = _jitc->functions.ptr + node->w.w;
+       else if(node->code == jit_code_epilog)
+           _jitc->function = NULL;
+       else {
 #if JIT_HASH_CONSTS
-               if (mask & jit_cc_a0_flt) {
-                   node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a0_dbl) {
-                   node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a1_flt) {
-                   node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a1_dbl) {
-                   node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a2_flt) {
-                   node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a2_dbl) {
-                   node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-#endif
-               if (_jitc->function) {
-                   if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
-                       (jit_cc_a0_reg|jit_cc_a0_chg)) {
-                       if (mask & jit_cc_a0_rlh) {
-                           jit_regset_setbit(&_jitc->function->regset,
-                                             jit_regno(node->u.q.l));
-                           jit_regset_setbit(&_jitc->function->regset,
-                                             jit_regno(node->u.q.h));
-                       }
-                       else
-                           jit_regset_setbit(&_jitc->function->regset,
-                                             jit_regno(node->u.w));
-                   }
-                   if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
-                       (jit_cc_a1_reg|jit_cc_a1_chg))
+           if (mask & jit_cc_a0_flt) {
+               node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a0_dbl) {
+               node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a1_flt) {
+               node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a1_dbl) {
+               node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a2_flt) {
+               node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a2_dbl) {
+               node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+#endif
+           if (_jitc->function) {
+               if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
+                   (jit_cc_a0_reg|jit_cc_a0_chg)) {
+                   if (mask & jit_cc_a0_rlh) {
+                       jit_regset_setbit(&_jitc->function->regset,
+                                         jit_regno(node->u.q.l));
                        jit_regset_setbit(&_jitc->function->regset,
-                                         jit_regno(node->v.w));
-                   if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
-                       (jit_cc_a2_reg|jit_cc_a2_chg))
+                                         jit_regno(node->u.q.h));
+                   }
+                   else
                        jit_regset_setbit(&_jitc->function->regset,
-                                         jit_regno(node->w.w));
+                                         jit_regno(node->u.w));
                }
-               break;
+               if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
+                   (jit_cc_a1_reg|jit_cc_a1_chg))
+                   jit_regset_setbit(&_jitc->function->regset,
+                                     jit_regno(node->v.w));
+               if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
+                   (jit_cc_a2_reg|jit_cc_a2_chg))
+                   jit_regset_setbit(&_jitc->function->regset,
+                                     jit_regno(node->w.w));
+           }
        }
     }
 }
@@ -1747,6 +2116,10 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node)
        case jit_code_label:    case jit_code_prolog:   case jit_code_epilog:
            block = _jitc->blocks.ptr + node->v.w;
            jit_regset_set(&_jitc->reglive, &block->reglive);
+           jit_regset_set_ui(&_jitc->explive, 0);
+           break;
+       case jit_code_live:
+           jit_regset_setbit(&_jitc->explive, node->u.w);
            break;
        case jit_code_callr:
            value = jit_regno(node->u.w);
@@ -1794,21 +2167,41 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node)
                    }
                }
            }
-           if ((value & jit_cc_a1_reg) && !(node->v.w & jit_regno_patch)) {
-               if (value & jit_cc_a1_chg) {
-                   jit_regset_clrbit(&_jitc->reglive, node->v.w);
-                   jit_regset_setbit(&_jitc->regmask, node->v.w);
+           if (value & jit_cc_a1_reg) {
+               if (value & jit_cc_a1_rlh) {
+                   /* Assume registers are not changed */
+                   if (!(node->v.q.l & jit_regno_patch))
+                       jit_regset_setbit(&_jitc->reglive, node->v.q.l);
+                   if (!(node->v.q.h & jit_regno_patch))
+                       jit_regset_setbit(&_jitc->reglive, node->v.q.h);
+               }
+               else if (!(node->v.w & jit_regno_patch)) {
+                   if (value & jit_cc_a1_chg) {
+                       jit_regset_clrbit(&_jitc->reglive, node->v.w);
+                       jit_regset_setbit(&_jitc->regmask, node->v.w);
+                   }
+                   else
+                       jit_regset_setbit(&_jitc->reglive, node->v.w);
                }
-               else
-                   jit_regset_setbit(&_jitc->reglive, node->v.w);
            }
-           if ((value & jit_cc_a2_reg) && !(node->w.w & jit_regno_patch)) {
-               if (value & jit_cc_a2_chg) {
-                   jit_regset_clrbit(&_jitc->reglive, node->w.w);
-                   jit_regset_setbit(&_jitc->regmask, node->w.w);
+           if (value & jit_cc_a2_reg) {
+               if (value & jit_cc_a2_rlh) {
+                   /* Assume registers are not changed */
+                   if (!(node->w.q.l & jit_regno_patch))
+                       jit_regset_setbit(&_jitc->reglive, node->w.q.l);
+                   if (!(node->w.q.h & jit_regno_patch))
+                       jit_regset_setbit(&_jitc->reglive, node->w.q.h);
+               }
+               else {
+                   if (!(node->w.w & jit_regno_patch)) {
+                       if (value & jit_cc_a2_chg) {
+                           jit_regset_clrbit(&_jitc->reglive, node->w.w);
+                           jit_regset_setbit(&_jitc->regmask, node->w.w);
+                       }
+                       else
+                           jit_regset_setbit(&_jitc->reglive, node->w.w);
+                   }
                }
-               else
-                   jit_regset_setbit(&_jitc->reglive, node->w.w);
            }
            if (jit_regset_set_p(&_jitc->regmask)) {
                jit_update(node->next, &_jitc->reglive, &_jitc->regmask);
@@ -1837,10 +2230,35 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
        else
            jit_regset_setbit(&_jitc->regarg, jit_regno(node->u.w));
     }
-    if (value & jit_cc_a1_reg)
-       jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w));
-    if (value & jit_cc_a2_reg)
-       jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
+    if (value & jit_cc_a1_reg) {
+       if (value & jit_cc_a1_rlh) {
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.q.l));
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.q.h));
+       }
+       else
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w));
+    }
+    if (value & jit_cc_a2_reg) {
+       if (value & jit_cc_a2_rlh) {
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l));
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.h));
+       }
+       else
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
+    }
+    /* Prevent incorrect detection of running out of registers
+     * if will need to patch jump, and all registers have been
+     * used in the current block. */
+    if (node->code == jit_code_jmpi && (node->flag & jit_flag_node)) {
+       jit_node_t      *label = node->u.n;
+       jit_block_t     *block = _jitc->blocks.ptr + label->v.w;
+       jit_regset_set(&_jitc->reglive, &block->reglive);
+       jit_regset_set(&_jitc->regmask, &block->regmask);
+       if (jit_regset_set_p(&_jitc->explive)) {
+           jit_regset_ior(&_jitc->reglive, &block->reglive, &_jitc->explive);
+           jit_regset_xor(&_jitc->regmask, &_jitc->regmask, &_jitc->explive);
+       }
+    }
 }
 
 void
@@ -1857,10 +2275,22 @@ _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
        else
            jit_regset_clrbit(&_jitc->regarg, jit_regno(node->u.w));
     }
-    if (value & jit_cc_a1_reg)
-       jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w));
-    if (value & jit_cc_a2_reg)
-       jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w));
+    if (value & jit_cc_a1_reg) {
+       if (value & jit_cc_a1_rlh) {
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.q.l));
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.q.h));
+       }
+       else
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w));
+    }
+    if (value & jit_cc_a2_reg) {
+       if (value & jit_cc_a2_rlh) {
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l));
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.h));
+       }
+       else
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w));
+    }
 }
 
 void
@@ -1895,6 +2325,9 @@ _jit_dataset(jit_state_t *_jit)
 #endif
 
     assert(!_jitc->dataset);
+#if !HAVE_MMAP
+    assert(_jit->user_data);
+#else
     if (!_jit->user_data) {
 
        /* create read only data buffer */
@@ -1912,6 +2345,7 @@ _jit_dataset(jit_state_t *_jit)
        close(mmap_fd);
 #endif
     }
+#endif /* !HAVE_MMAP */
 
     if (!_jitc->no_data)
        jit_memcpy(_jit->data.ptr, _jitc->data.ptr, _jitc->data.offset);
@@ -2013,10 +2447,13 @@ _jit_emit(jit_state_t *_jit)
     jit_pointer_t       code;
     jit_node_t         *node;
     size_t              length;
+#ifndef NDEBUG
     int                         result;
+#endif
 #if defined(__sgi)
     int                         mmap_fd;
 #endif
+    int                         mmap_prot, mmap_flags;
 
     if (!_jitc->realize)
        jit_realize();
@@ -2026,20 +2463,41 @@ _jit_emit(jit_state_t *_jit)
 
     _jitc->emit = 1;
 
+#if !HAVE_MMAP
+    assert(_jit->user_code);
+#else
     if (!_jit->user_code) {
+       mmap_prot = PROT_READ | PROT_WRITE;
+#if !(__OpenBSD__ || __APPLE__)
+       mmap_prot |= PROT_EXEC;
+#endif
+#if __NetBSD__
+       mmap_prot = PROT_MPROTECT(mmap_prot);
+       mmap_flags = 0;
+#else
+       mmap_flags = MAP_PRIVATE;
+#endif
+       mmap_flags |= MAP_ANON;
 #if defined(__sgi)
        mmap_fd = open("/dev/zero", O_RDWR);
 #endif
        _jit->code.ptr = mmap(NULL, _jit->code.length,
-                             PROT_EXEC | PROT_READ | PROT_WRITE,
-                             MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+                             mmap_prot, mmap_flags, mmap_fd, 0);
        assert(_jit->code.ptr != MAP_FAILED);
     }
+#endif /* !HAVE_MMAP */
     _jitc->code.end = _jit->code.ptr + _jit->code.length -
        jit_get_max_instr();
     _jit->pc.uc = _jit->code.ptr;
 
     for (;;) {
+#if __NetBSD__
+#ifndef NDEBUG
+       result =
+#endif
+       mprotect(_jit->code.ptr, _jit->code.length, PROT_READ | PROT_WRITE);
+       assert(result == 0);
+#endif
        if ((code = emit_code()) == NULL) {
            _jitc->patches.offset = 0;
            for (node = _jitc->head; node; node = node->next) {
@@ -2048,6 +2506,9 @@ _jit_emit(jit_state_t *_jit)
                     node->code == jit_code_epilog))
                    node->flag &= ~jit_flag_patch;
            }
+#if !HAVE_MMAP
+           assert(_jit->user_code);
+#else
            if (_jit->user_code)
                goto fail;
 #if GET_JIT_SIZE
@@ -2072,8 +2533,7 @@ _jit_emit(jit_state_t *_jit)
 #  endif
 #else
            _jit->code.ptr = mmap(NULL, length,
-                                 PROT_EXEC | PROT_READ | PROT_WRITE,
-                                 MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+                                 mmap_prot, mmap_flags, mmap_fd, 0);
 #endif
 
            assert(_jit->code.ptr != MAP_FAILED);
@@ -2081,6 +2541,7 @@ _jit_emit(jit_state_t *_jit)
            _jitc->code.end = _jit->code.ptr + _jit->code.length -
                jit_get_max_instr();
            _jit->pc.uc = _jit->code.ptr;
+#endif /* !HAVE_MMAP */
        }
        else
            break;
@@ -2097,15 +2558,27 @@ _jit_emit(jit_state_t *_jit)
 
     if (_jit->user_data)
        jit_free((jit_pointer_t *)&_jitc->data.ptr);
+#if HAVE_MMAP
     else {
-       result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ);
+#  ifndef NDEBUG
+       result =
+#  endif
+       mprotect(_jit->data.ptr, _jit->data.length, PROT_READ);
        assert(result == 0);
     }
     if (!_jit->user_code) {
-       result = mprotect(_jit->code.ptr, _jit->code.length,
-                         PROT_READ | PROT_EXEC);
+       _jit->code.protect = _jit->pc.uc - _jit->code.ptr;
+#  if __riscv && __WORDSIZE == 64
+       /* FIXME should start adding consts at a page boundary */
+       _jit->code.protect -= _jitc->consts.hash.count * sizeof(jit_word_t);
+#  endif
+#  ifndef NDEBUG
+       result =
+       mprotect(_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_EXEC);
+#  endif
        assert(result == 0);
     }
+#endif /* HAVE_MMAP */
 
     return (_jit->code.ptr);
 fail:
@@ -2113,15 +2586,51 @@ fail:
 }
 
 void
-_jit_frame(jit_state_t *_jit, jit_int32_t frame)
+_jit_protect(jit_state_t *_jit)
 {
-    jit_trampoline(frame, 1);
+#if !HAVE_MMAP
+  assert (_jit->user_code);
+#else
+#  ifndef NDEBUG
+  int result;
+#  endif
+  if (_jit->user_code) return;
+#  ifndef NDEBUG
+  result =
+#endif
+  mprotect (_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_EXEC);
+  assert (result == 0);
+#endif
 }
 
 void
-_jit_tramp(jit_state_t *_jit, jit_int32_t frame)
+_jit_unprotect(jit_state_t *_jit)
 {
-    jit_trampoline(frame, 0);
+#if !HAVE_MMAP
+  assert (_jit->user_code);
+#else
+#  ifndef NDEBUG
+  int result;
+#  endif
+  if (_jit->user_code) return;
+#  ifndef NDEBUG
+  result =
+#  endif
+  mprotect (_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_WRITE);
+  assert (result == 0);
+#endif
+}
+
+void
+_jit_frame(jit_state_t *_jit, jit_int32_t frame)
+{
+    jit_trampoline(frame, 1);
+}
+
+void
+_jit_tramp(jit_state_t *_jit, jit_int32_t frame)
+{
+    jit_trampoline(frame, 0);
 }
 
 void
@@ -2177,21 +2686,51 @@ _jit_setup(jit_state_t *_jit, jit_block_t *block)
                /* Check argument registers in reverse order to properly
                 * handle registers that are both, argument and result */
                value = jit_classify(node->code);
-               if ((value & jit_cc_a2_reg) &&
-                   !(node->w.w & jit_regno_patch) &&
-                   jit_regset_tstbit(&block->regmask, node->w.w)) {
+               if (value & jit_cc_a2_reg) {
                    live = !(value & jit_cc_a2_chg);
-                   jit_regset_clrbit(&block->regmask, node->w.w);
-                   if (live)
-                       jit_regset_setbit(&block->reglive, node->w.w);
+                   if (value & jit_cc_a2_rlh) {
+                       /* Assume will not modify a pair in second argument */
+                       assert(live);
+                       if (!(node->w.q.l & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->w.q.l)) {
+                           jit_regset_clrbit(&block->regmask, node->w.q.l);
+                       }
+                       if (!(node->w.q.h & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->w.q.h)) {
+                           jit_regset_clrbit(&block->regmask, node->w.q.h);
+                       }
+                   }
+                   else {
+                       if (!(node->w.w & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->w.w)) {
+                           jit_regset_clrbit(&block->regmask, node->w.w);
+                           if (live)
+                               jit_regset_setbit(&block->reglive, node->w.w);
+                       }
+                   }
                }
-               if ((value & jit_cc_a1_reg) &&
-                   !(node->v.w & jit_regno_patch) &&
-                   jit_regset_tstbit(&block->regmask, node->v.w)) {
+               if (value & jit_cc_a1_reg) {
                    live = !(value & jit_cc_a1_chg);
-                   jit_regset_clrbit(&block->regmask, node->v.w);
-                   if (live)
-                       jit_regset_setbit(&block->reglive, node->v.w);
+                   if (value & jit_cc_a1_rlh) {
+                       /* Assume will not modify a pair in second argument */
+                       assert(live);
+                       if (!(node->v.q.l & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->v.q.l)) {
+                           jit_regset_clrbit(&block->regmask, node->v.q.l);
+                       }
+                       if (!(node->v.q.h & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->v.q.h)) {
+                           jit_regset_clrbit(&block->regmask, node->v.q.h);
+                       }
+                   }
+                   else {
+                       if (!(node->v.w & jit_regno_patch) &&
+                           jit_regset_tstbit(&block->regmask, node->v.w)) {
+                           jit_regset_clrbit(&block->regmask, node->v.w);
+                           if (live)
+                               jit_regset_setbit(&block->reglive, node->v.w);
+                       }
+                   }
                }
                if (value & jit_cc_a0_reg) {
                    live = !(value & jit_cc_a0_chg);
@@ -2227,7 +2766,7 @@ _jit_setup(jit_state_t *_jit, jit_block_t *block)
  * or normal flow that have a live register not used in this block.
  */
 static void
-_jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo)
+_jit_follow(jit_state_t *_jit, jit_block_t *block)
 {
     jit_node_t         *node;
     jit_block_t                *next;
@@ -2256,7 +2795,7 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo)
                    /*  Remove from unknown state bitmask. */
                    jit_regset_com(&regtemp, &regtemp);
                    jit_regset_and(&block->regmask, &block->regmask, &regtemp);
-                   *todo = 1;
+                   block->again = 1;
                }
            case jit_code_prolog:
            case jit_code_epilog:
@@ -2284,20 +2823,46 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo)
            default:
                value = jit_classify(node->code);
                if (value & jit_cc_a2_reg) {
-                   if (!(node->w.w & jit_regno_patch)) {
-                       if (jit_regset_tstbit(&regmask, node->w.w)) {
-                           jit_regset_clrbit(&regmask, node->w.w);
-                           if (!(value & jit_cc_a2_chg))
-                               jit_regset_setbit(&reglive, node->w.w);
+                   if (value & jit_cc_a2_rlh) {
+                       if (!(node->w.q.l & jit_regno_patch)) {
+                           /* Assume register is not changed */
+                           if (jit_regset_tstbit(&regmask, node->w.q.l))
+                               jit_regset_clrbit(&regmask, node->w.q.l);
+                       }
+                       if (!(node->w.q.h & jit_regno_patch)) {
+                           if (jit_regset_tstbit(&regmask, node->w.q.h))
+                               jit_regset_clrbit(&regmask, node->w.q.h);
+                       }
+                   }
+                   else {
+                       if (!(node->w.w & jit_regno_patch)) {
+                           if (jit_regset_tstbit(&regmask, node->w.w)) {
+                               jit_regset_clrbit(&regmask, node->w.w);
+                               if (!(value & jit_cc_a2_chg))
+                                   jit_regset_setbit(&reglive, node->w.w);
+                           }
                        }
                    }
                }
                if (value & jit_cc_a1_reg) {
-                   if (!(node->v.w & jit_regno_patch)) {
-                       if (jit_regset_tstbit(&regmask, node->v.w)) {
-                           jit_regset_clrbit(&regmask, node->v.w);
-                           if (!(value & jit_cc_a1_chg))
-                               jit_regset_setbit(&reglive, node->v.w);
+                   if (value & jit_cc_a1_rlh) {
+                       if (!(node->v.q.l & jit_regno_patch)) {
+                           /* Assume register is not changed */
+                           if (jit_regset_tstbit(&regmask, node->v.q.l))
+                               jit_regset_clrbit(&regmask, node->v.q.l);
+                       }
+                       if (!(node->v.q.h & jit_regno_patch)) {
+                           if (jit_regset_tstbit(&regmask, node->v.q.h))
+                               jit_regset_clrbit(&regmask, node->v.q.h);
+                       }
+                   }
+                   else {
+                       if (!(node->v.w & jit_regno_patch)) {
+                           if (jit_regset_tstbit(&regmask, node->v.w)) {
+                               jit_regset_clrbit(&regmask, node->v.w);
+                               if (!(value & jit_cc_a1_chg))
+                                   jit_regset_setbit(&reglive, node->v.w);
+                           }
                        }
                    }
                }
@@ -2343,7 +2908,7 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo)
                            jit_regset_com(&regtemp, &regtemp);
                            jit_regset_and(&block->regmask,
                                           &block->regmask, &regtemp);
-                           *todo = 1;
+                           block->again = 1;
                        }
                    }
                    else {
@@ -2356,19 +2921,19 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo)
                         * means that only JIT_Vn registers can be trusted on
                         * arrival of jmpr.
                         */
+                       jit_regset_set_ui(&regmask, 0);
                        for (regno = 0; regno < _jitc->reglen; regno++) {
                            spec = jit_class(_rvs[regno].spec);
-                           if (jit_regset_tstbit(&regmask, regno) &&
-                               (spec & (jit_class_gpr|jit_class_fpr)) &&
-                               !(spec & jit_class_sav))
-                               jit_regset_clrbit(&regmask, regno);
+                           if ((spec & (jit_class_gpr|jit_class_fpr)) &&
+                               (spec & jit_class_sav))
+                               jit_regset_setbit(&regmask, regno);
                        }
                        /*   Assume non callee save registers are live due
                         * to jump to unknown location. */
                        /* Treat all callee save as live. */
-                       jit_regset_ior(&reglive, &reglive, &regmask);
+                       jit_regset_ior(&block->reglive, &reglive, &regmask);
                        /* Treat anything else as dead. */
-                       jit_regset_set_ui(&regmask, 0);
+                       return;
                    }
                }
                break;
@@ -2435,20 +3000,46 @@ _jit_update(jit_state_t *_jit, jit_node_t *node,
            default:
                value = jit_classify(node->code);
                if (value & jit_cc_a2_reg) {
-                   if (!(node->w.w & jit_regno_patch)) {
-                       if (jit_regset_tstbit(mask, node->w.w)) {
-                           jit_regset_clrbit(mask, node->w.w);
-                           if (!(value & jit_cc_a2_chg))
-                               jit_regset_setbit(live, node->w.w);
+                   if (value & jit_cc_a2_rlh) {
+                       if (!(node->w.q.l & jit_regno_patch)) {
+                           /* Assume register is not changed */
+                           if (jit_regset_tstbit(mask, node->w.q.l))
+                               jit_regset_clrbit(mask, node->w.q.l);
+                       }
+                       if (!(node->w.q.h & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->w.q.h))
+                               jit_regset_clrbit(mask, node->w.q.h);
+                       }
+                   }
+                   else {
+                       if (!(node->w.w & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->w.w)) {
+                               jit_regset_clrbit(mask, node->w.w);
+                               if (!(value & jit_cc_a2_chg))
+                                   jit_regset_setbit(live, node->w.w);
+                           }
                        }
                    }
                }
                if (value & jit_cc_a1_reg) {
-                   if (!(node->v.w & jit_regno_patch)) {
-                       if (jit_regset_tstbit(mask, node->v.w)) {
-                           jit_regset_clrbit(mask, node->v.w);
-                           if (!(value & jit_cc_a1_chg))
-                               jit_regset_setbit(live, node->v.w);
+                   if (value & jit_cc_a1_rlh) {
+                       if (!(node->v.q.l & jit_regno_patch)) {
+                           /* Assume register is not changed */
+                           if (jit_regset_tstbit(mask, node->v.q.l))
+                               jit_regset_clrbit(mask, node->v.q.l);
+                       }
+                       if (!(node->v.q.h & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->v.q.h))
+                               jit_regset_clrbit(mask, node->v.q.h);
+                       }
+                   }
+                   else {
+                       if (!(node->v.w & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->v.w)) {
+                               jit_regset_clrbit(mask, node->v.w);
+                               if (!(value & jit_cc_a1_chg))
+                                   jit_regset_setbit(live, node->v.w);
+                           }
                        }
                    }
                }
@@ -2504,19 +3095,22 @@ _jit_update(jit_state_t *_jit, jit_node_t *node,
                         * means that only JIT_Vn registers can be trusted on
                         * arrival of jmpr.
                         */
+                       jit_regset_set_ui(mask, 0);
                        for (regno = 0; regno < _jitc->reglen; regno++) {
                            spec = jit_class(_rvs[regno].spec);
-                           if (jit_regset_tstbit(mask, regno) &&
-                               (spec & (jit_class_gpr|jit_class_fpr)) &&
-                               !(spec & jit_class_sav))
-                               jit_regset_clrbit(mask, regno);
+                           if ((spec & (jit_class_gpr|jit_class_fpr)) &&
+                               (spec & jit_class_sav))
+                               jit_regset_setbit(mask, regno);
                        }
                        /*   Assume non callee save registers are live due
                         * to jump to unknown location. */
                        /* Treat all callee save as live. */
                        jit_regset_ior(live, live, mask);
+                       /*   Prevent explicitly set as live registers to
+                        * be used as a temporary for the jmpi. */
+                       jit_regset_ior(live, live, &_jitc->explive);
                        /* Treat anything else as dead. */
-                       jit_regset_set_ui(mask, 0);
+                       return;
                    }
                }
                break;
@@ -2582,7 +3176,10 @@ _sequential_labels(jit_state_t *_jit)
                    if ((jump = node->link)) {
                        for (; jump; jump = link) {
                            link = jump->link;
-                           jump->u.n = prev;
+                           if (jump->code == jit_code_movi)
+                               jump->v.n = prev;
+                           else
+                               jump->u.n = prev;
                            jump->link = prev->link;
                            prev->link = jump;
                        }
@@ -2596,7 +3193,10 @@ _sequential_labels(jit_state_t *_jit)
                if ((jump = next->link)) {
                    for (; jump; jump = link) {
                        link = jump->link;
-                       jump->u.n = node;
+                       if (jump->code == jit_code_movi)
+                           jump->v.n = node;
+                       else
+                           jump->u.n = node;
                        jump->link = node->link;
                        node->link = jump;
                    }
@@ -2618,36 +3218,59 @@ _split_branches(jit_state_t *_jit)
     jit_node_t         *next;
     jit_node_t         *label;
     jit_block_t                *block;
-
+    jit_block_t                *blocks;
+    jit_word_t          offset;
+    jit_word_t          length;
+
+    length = _jitc->blocks.length;
+    jit_alloc((jit_pointer_t *)&blocks, length * sizeof(jit_block_t));
+    if ((node = _jitc->head) &&
+       (node->code == jit_code_label || node->code == jit_code_prolog)) {
+       block = _jitc->blocks.ptr + node->v.w;
+       memcpy(blocks, block, sizeof(jit_block_t));
+       node->v.w = 0;
+       offset = 1;
+    }
+    else
+       offset = 0;
     for (node = _jitc->head; node; node = next) {
        if ((next = node->next)) {
            if (next->code == jit_code_label ||
                next->code == jit_code_prolog ||
-               next->code == jit_code_epilog)
-               continue;
+               next->code == jit_code_epilog) {
+               if (offset >= length) {
+                   jit_realloc((jit_pointer_t *)&blocks,
+                               length * sizeof(jit_block_t),
+                               (length + 16) * sizeof(jit_block_t));
+                   length += 16;
+               }
+               block = _jitc->blocks.ptr + next->v.w;
+               memcpy(blocks + offset, block, sizeof(jit_block_t));
+               next->v.w = offset++;
+           }
            /* split block on branches */
-           if (jit_classify(node->code) & jit_cc_a0_jmp) {
+           else if (jit_classify(node->code) & jit_cc_a0_jmp) {
                label = new_node(jit_code_label);
                label->next = next;
                node->next = label;
-               if (_jitc->blocks.offset >= _jitc->blocks.length) {
-                   jit_word_t    length;
-
-                   length = _jitc->blocks.length + 16;
-                   jit_realloc((jit_pointer_t *)&_jitc->blocks.ptr,
-                               _jitc->blocks.length * sizeof(jit_block_t),
-                               length * sizeof(jit_block_t));
-                   _jitc->blocks.length = length;
+               if (offset >= length) {
+                   jit_realloc((jit_pointer_t *)&blocks,
+                               length * sizeof(jit_block_t),
+                               (length + 16) * sizeof(jit_block_t));
+                   length += 16;
                }
-               block = _jitc->blocks.ptr + _jitc->blocks.offset;
+               block = blocks + offset;
                block->label = label;
-               label->v.w = _jitc->blocks.offset;
+               label->v.w = offset++;
                jit_regset_new(&block->reglive);
                jit_regset_new(&block->regmask);
-               ++_jitc->blocks.offset;
            }
        }
     }
+    jit_free((jit_pointer_t *)&_jitc->blocks.ptr);
+    _jitc->blocks.ptr = blocks;
+    _jitc->blocks.offset = offset;
+    _jitc->blocks.length = length;
 }
 
 static jit_bool_t
@@ -2728,7 +3351,6 @@ _redundant_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
                }
                break;
            case jit_code_name:         case jit_code_note:
-           case jit_code_align:
                break;
            default:
                return (0);
@@ -2779,7 +3401,7 @@ reverse_jump_code(jit_code_t code)
        case jit_code_bgti_f:   return (jit_code_bunlei_f);
 
        case jit_code_bner_f:   return (jit_code_beqr_f);
-       case jit_code_bnei_f:   return (jit_code_beqr_f);
+       case jit_code_bnei_f:   return (jit_code_beqi_f);
 
        case jit_code_bunltr_f: return (jit_code_bger_f);
        case jit_code_bunlti_f: return (jit_code_bgei_f);
@@ -2912,7 +3534,7 @@ _reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
     return (0);
 }
 
-static void
+static jit_bool_t
 _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump)
 {
     jit_node_t         *iter;
@@ -2920,30 +3542,33 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump)
     jit_word_t          word;
     jit_int32_t                 spec;
     jit_int32_t                 regno;
+    jit_bool_t          result;
 
     if (jump) {
        prev = node->u.n;
        if (prev->code == jit_code_epilog)
-           return;
+           return (0);
        assert(prev->code == jit_code_label);
        if ((prev->flag & jit_flag_head) || node->link || prev->link != node)
            /* multiple sources */
-           return;
+           return (0);
        /* if there are sequential labels it will return below */
     }
     else
        prev = node;
+    result = 0;
     word = node->w.w;
     regno = jit_regno(node->v.w);
     for (iter = prev->next; iter; prev = iter, iter = iter->next) {
        switch (iter->code) {
            case jit_code_label:        case jit_code_prolog:
            case jit_code_epilog:
-               return;
+               return (result);
            case jit_code_movi:
                if (regno == jit_regno(iter->u.w)) {
                    if (iter->flag || iter->v.w != word)
-                       return;
+                       return (result);
+                   result = 1;
                    del_node(prev, iter);
                    iter = prev;
                }
@@ -2951,32 +3576,34 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump)
            default:
                spec = jit_classify(iter->code);
                if (spec & jit_cc_a0_jmp)
-                   return;
+                   return (result);
                if ((spec & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
                    (jit_cc_a0_reg|jit_cc_a0_chg)) {
                    if (spec & jit_cc_a0_rlh) {
                        if (regno == jit_regno(iter->u.q.l) ||
                            regno == jit_regno(iter->u.q.h))
-                           return;
+                           return (result);
                    }
                    else {
                        if (regno == jit_regno(iter->u.w))
-                           return;
+                           return (result);
                    }
                }
                if ((spec & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
                    (jit_cc_a1_reg|jit_cc_a1_chg)) {
                    if (regno == jit_regno(iter->v.w))
-                       return;
+                       return (result);
                }
                if ((spec & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
                    (jit_cc_a2_reg|jit_cc_a2_chg)) {
                    if (regno == jit_regno(iter->w.w))
-                       return;
+                       return (result);
                }
                break;
        }
     }
+
+    return (result);
 }
 
 static jit_bool_t
@@ -2985,11 +3612,21 @@ _simplify_movr(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node,
 {
     jit_int32_t                 regno;
     jit_int32_t                 right;
+    jit_int32_t                 spec;
     jit_value_t                *value;
 
     regno = jit_regno(node->u.w);
     right = jit_regno(node->v.w);
     value = _jitc->values + regno;
+
+    spec = jit_class(_rvs[regno].spec);
+    if (!(spec & (jit_class_gpr | jit_class_fpr))) {
+       /* reserved register */
+       value->kind = 0;
+       ++_jitc->gen[regno];
+       return (0);
+    }
+
     if ((value->kind == jit_kind_register &&
         jit_regno(value->base.q.l) == right &&
         value->base.q.h == _jitc->gen[right]) ||
@@ -3027,12 +3664,18 @@ _simplify_movi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node,
        ++_jitc->gen[regno];
        return (0);
     }
+    spec = jit_class(_rvs[regno].spec);
+    if (!(spec & (jit_class_gpr | jit_class_fpr))) {
+       /* reserved register */
+       value->kind = 0;
+       ++_jitc->gen[regno];
+       return (0);
+    }
     if (value->kind == kind) {
        if (memcmp(&node->v.w, &value->base.w, size) == 0) {
            del_node(prev, node);
            return (1);
        }
-       spec = jit_class(_rvs[regno].spec);
        if (kind == jit_kind_word)
            spec &= jit_class_gpr;
        else
@@ -3123,7 +3766,6 @@ _simplify_stxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
     /* no multiple information, so, if set to a constant,
      * prefer to keep that information */
     if (value->kind == 0) {
-       value->kind = jit_kind_code;
        switch (node->code) {
            /* no information about signed/unsigned either */
            case jit_code_stxi_c:       value->code = jit_code_ldxi_c;  break;
@@ -3164,7 +3806,7 @@ _simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
  * once to the same value, and is a common pattern of calls
  * to jit_pushargi and jit_pushargr
  */
-static void
+static jit_bool_t
 _simplify(jit_state_t *_jit)
 {
     jit_node_t         *prev;
@@ -3172,7 +3814,9 @@ _simplify(jit_state_t *_jit)
     jit_node_t         *next;
     jit_int32_t                 info;
     jit_int32_t                 regno;
+    jit_bool_t          result;
 
+    result = 0;
     for (prev = NULL, node = _jitc->head; node; prev = node, node = next) {
        next = node->next;
        switch (node->code) {
@@ -3195,6 +3839,7 @@ _simplify(jit_state_t *_jit)
                     * already holding */
                    patch_register(node->link->next, node,
                                   jit_regno_patch|regno, regno);
+                   result = 1;
                    del_node(_jitc->spill[regno], node->link);
                    del_node(prev, node);
                    node = prev;
@@ -3204,38 +3849,50 @@ _simplify(jit_state_t *_jit)
            case jit_code_movr:
                regno = jit_regno(node->u.w);
                if (simplify_movr(prev, node,
-                                 jit_kind_word, sizeof(jit_word_t)))
+                                 jit_kind_word, sizeof(jit_word_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movi:
                regno = jit_regno(node->u.w);
                if (simplify_movi(prev, node,
-                                 jit_kind_word, sizeof(jit_word_t)))
+                                 jit_kind_word, sizeof(jit_word_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movr_f:
                regno = jit_regno(node->u.w);
                if (simplify_movr(prev, node,
-                                 jit_kind_float32, sizeof(jit_float32_t)))
+                                 jit_kind_float32, sizeof(jit_float32_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movi_f:
                regno = jit_regno(node->u.w);
                if (simplify_movi(prev, node,
-                                 jit_kind_float32, sizeof(jit_float32_t)))
+                                 jit_kind_float32, sizeof(jit_float32_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movr_d:
                regno = jit_regno(node->u.w);
                if (simplify_movr(prev, node,
-                                 jit_kind_float64, sizeof(jit_float64_t)))
+                                 jit_kind_float64, sizeof(jit_float64_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movi_d:
                regno = jit_regno(node->u.w);
                if (simplify_movi(prev, node,
-                                 jit_kind_float64, sizeof(jit_float64_t)))
+                                 jit_kind_float64, sizeof(jit_float64_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_ldxi_c:       case jit_code_ldxi_uc:
            case jit_code_ldxi_s:       case jit_code_ldxi_us:
@@ -3243,15 +3900,19 @@ _simplify(jit_state_t *_jit)
            case jit_code_ldxi_l:
            case jit_code_ldxi_f:       case jit_code_ldxi_d:
                regno = jit_regno(node->u.w);
-               if (simplify_ldxi(prev, node))
+               if (simplify_ldxi(prev, node)) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_stxi_c:       case jit_code_stxi_s:
            case jit_code_stxi_i:       case jit_code_stxi_l:
            case jit_code_stxi_f:       case jit_code_stxi_d:
                regno = jit_regno(node->u.w);
-               if (simplify_stxi(prev, node))
+               if (simplify_stxi(prev, node)) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            default:
                info = jit_classify(node->code);
@@ -3275,18 +3936,49 @@ _simplify(jit_state_t *_jit)
                    }
                }
                if (info & jit_cc_a1_chg) {
-                   regno = jit_regno(node->v.w);
-                   _jitc->values[regno].kind = 0;
-                   ++_jitc->gen[regno];
+#if 0
+                   /* Assume registers are not changed */
+                   if (info & jit_cc_a1_rlh) {
+                       regno = jit_regno(node->v.q.l);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                       regno = jit_regno(node->v.q.h);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                   }
+                   else {
+#endif
+                       regno = jit_regno(node->v.w);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+#if 0
+                   }
+#endif
                }
                if (info & jit_cc_a2_chg) {
-                   regno = jit_regno(node->w.w);
-                   _jitc->values[regno].kind = 0;
-                   ++_jitc->gen[regno];
+#if 0
+                   /* Assume registers are not changed */
+                   if (info & jit_cc_a2_rlh) {
+                       regno = jit_regno(node->w.q.l);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                       regno = jit_regno(node->w.q.h);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                   }
+                   else {
+#endif
+                       regno = jit_regno(node->w.w);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+#if 0
+                   }
+#endif
                }
                break;
        }
     }
+    return (result);
 }
 
 static jit_int32_t
@@ -3294,33 +3986,51 @@ _register_change_p(jit_state_t *_jit, jit_node_t *node, jit_node_t *link,
                   jit_int32_t regno)
 {
     jit_int32_t                value;
+    jit_bool_t         use, change;
 
     for (; node != link; node = node->next) {
        switch (node->code) {
            case jit_code_label:        case jit_code_prolog:
-               /* lack of extra information so cannot say it is undefined */
-               return (jit_reg_change);
+               /* lack of extra information so assume it is live */
+               return (jit_reg_static);
            case jit_code_callr:        case jit_code_calli:
                if (!(jit_class(_rvs[regno].spec) & jit_class_sav))
                    return (jit_reg_undef);
                break;
            default:
                value = jit_classify(node->code);
+               use = change = 0;
+               if (value & jit_cc_a0_rlh) {
+                   if (node->u.q.l == regno || node->u.q.h == regno) {
+                       if (value & jit_cc_a0_chg)
+                           change = 1;
+                       use = !change;
+                   }
+               }
+               else if ((value & jit_cc_a0_reg) && node->u.w == regno) {
+                   if (value & jit_cc_a0_chg)
+                       change = 1;
+                   use = !change;
+               }
+               if (!use &&
+                   (value & jit_cc_a1_reg) && node->v.w == regno) {
+                   if (value & jit_cc_a1_chg)
+                       change = 1;
+                   use = !change;
+               }
+               if (!use &&
+                   (value & jit_cc_a2_reg) && node->w.w == regno) {
+                   if (value & jit_cc_a2_chg)
+                       change = 1;
+                   use = !change;
+               }
                /* lack of extra information */
-               if (value & jit_cc_a0_jmp)
-                   return (jit_reg_change);
-               else if ((value & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
-                        (jit_cc_a0_reg|jit_cc_a0_chg) &&
-                        (((value & jit_cc_a0_rlh) &&
-                          (node->u.q.l == regno || node->u.q.h == regno)) ||
-                         (!(value & jit_cc_a0_rlh) &&
-                          node->u.w == regno)))
-                   return (jit_reg_change);
-               else if ((value & jit_cc_a1_reg) && node->v.w == regno &&
-                        (value & jit_cc_a1_chg))
-                   return (jit_reg_change);
-               else if ((value & jit_cc_a2_reg) && node->w.w == regno &&
-                        (value & jit_cc_a2_chg))
+               if (!use && (value & (jit_cc_a0_jmp|jit_cc_a0_cnd)) &&
+                   /* In case of indirect branches, always consider
+                    * callee save registers as live. */
+                   !(jit_class(_rvs[regno].spec) & jit_class_sav))
+                   change = 1;
+               if (!use && change)
                    return (jit_reg_change);
        }
     }
@@ -3485,13 +4195,148 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link,
                    node->u.w = patch;
            }
        }
-       if ((value & jit_cc_a1_reg) && node->v.w == regno)
-           node->v.w = patch;
-       if ((value & jit_cc_a2_reg) && node->w.w == regno)
-           node->w.w = patch;
+       if (value & jit_cc_a1_reg) {
+           if (value & jit_cc_a1_rlh) {
+               if (node->v.q.l == regno)
+                   node->v.q.l = patch;
+               if (node->v.q.h == regno)
+                   node->v.q.h = patch;
+           }
+           else {
+               if (node->v.w == regno)
+                   node->v.w = patch;
+           }
+       }
+       if (value & jit_cc_a2_reg) {
+           if (value & jit_cc_a2_rlh) {
+               if (node->w.q.l == regno)
+                   node->w.q.l = patch;
+               if (node->w.q.h == regno)
+                   node->w.q.h = patch;
+           }
+           else {
+               if (node->w.w == regno)
+                   node->w.w = patch;
+           }
+       }
     }
 }
 
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define htonr_us(r0,r1)              bswapr_us(r0,r1)
+#  define htonr_ui(r0,r1)              bswapr_ui(r0,r1)
+#  if __WORDSIZE == 64
+#    define htonr_ul(r0,r1)            bswapr_ul(r0,r1)
+#  endif
+#else
+#  define htonr_us(r0,r1)              extr_us(r0,r1)
+#  if __WORDSIZE == 32
+#    define htonr_ui(r0,r1)            movr(r0,r1)
+#  else
+#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
+#    define htonr_ul(r0,r1)            movr(r0,r1)
+#  endif
+#endif
+
+static maybe_unused void
+generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+static maybe_unused void
+generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+#if __WORDSIZE == 64
+static maybe_unused void
+generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
+#endif
+
+#define depi(r0, i0, i1, i2)           _depi(_jit, r0, i0, i1, i2)
+static void _depi(jit_state_t*,jit_int32_t, jit_word_t, jit_word_t, jit_word_t);
+#define negi(r0, i0)                   _negi(_jit, r0, i0)
+static void _negi(jit_state_t*, jit_int32_t, jit_word_t);
+#define comi(r0, i0)                   _comi(_jit, r0, i0)
+static void _comi(jit_state_t*, jit_int32_t, jit_word_t);
+#define exti_c(r0, i0)                 _exti_c(_jit, r0, i0)
+static void _exti_c(jit_state_t*, jit_int32_t, jit_word_t);
+#define exti_uc(r0, i0)                        _exti_uc(_jit, r0, i0)
+static void _exti_uc(jit_state_t*, jit_int32_t, jit_word_t);
+#define exti_s(r0, i0)                 _exti_s(_jit, r0, i0)
+static void _exti_s(jit_state_t*, jit_int32_t, jit_word_t);
+#define exti_us(r0, i0)                        _exti_us(_jit, r0, i0)
+static void _exti_us(jit_state_t*, jit_int32_t, jit_word_t);
+#if __WORDSIZE == 64
+#define exti_i(r0, i0)                 _exti_i(_jit, r0, i0)
+static void _exti_i(jit_state_t*, jit_int32_t, jit_word_t);
+#define exti_ui(r0, i0)                        _exti_ui(_jit, r0, i0)
+static void _exti_ui(jit_state_t*, jit_int32_t, jit_word_t);
+#endif
+#define bswapi_us(r0, i0)              _bswapi_us(_jit, r0, i0)
+static void _bswapi_us(jit_state_t*, jit_int32_t, jit_word_t);
+#define bswapi_ui(r0, i0)              _bswapi_ui(_jit, r0, i0)
+static void _bswapi_ui(jit_state_t*, jit_int32_t, jit_word_t);
+#if __WORDSIZE == 64
+#  define bswapi_ul(r0, i0)            _bswapi_ul(_jit, r0, i0)
+static void _bswapi_ul(jit_state_t*, jit_int32_t, jit_word_t);
+#endif
+#define htoni_us(r0, i0)               _htoni_us(_jit, r0, i0)
+static void _htoni_us(jit_state_t*, jit_int32_t, jit_word_t);
+#define htoni_ui(r0, i0)               _htoni_ui(_jit, r0, i0)
+static void _htoni_ui(jit_state_t*, jit_int32_t, jit_word_t);
+#if __WORDSIZE == 64
+#  define htoni_ul(r0, i0)             _htoni_ul(_jit, r0, i0)
+static void _htoni_ul(jit_state_t*, jit_int32_t, jit_word_t);
+#endif
+#  define movi_f_w(r0, i0)             _movi_f_w(_jit, r0, i0)
+static void _movi_f_w(jit_state_t*, jit_int32_t, jit_float32_t);
+#if __WORDSIZE == 32 && !(defined(__mips__) && NEW_ABI)
+#  define movi_d_ww(r0, r1, i0)                _movi_d_ww(_jit, r0, r1, i0)
+static void _movi_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
+#else
+#  define movi_d_w(r0, i0)             _movi_d_w(_jit, r0, i0)
+static void _movi_d_w(jit_state_t*, jit_int32_t, jit_float64_t);
+#endif
+#define cloi(r0, i0)                   _cloi(_jit, r0, i0)
+static void _cloi(jit_state_t*, jit_int32_t, jit_word_t);
+#define clzi(r0, i0)                   _clzi(_jit, r0, i0)
+static void _clzi(jit_state_t*, jit_int32_t, jit_word_t);
+#define ctoi(r0, i0)                   _ctoi(_jit, r0, i0)
+static void _ctoi(jit_state_t*, jit_int32_t, jit_word_t);
+#define ctzi(r0, i0)                   _ctzi(_jit, r0, i0)
+static void _ctzi(jit_state_t*, jit_int32_t, jit_word_t);
+#define rbiti(r0, i0)                  _rbiti(_jit, r0, i0)
+static void _rbiti(jit_state_t*, jit_int32_t, jit_word_t);
+#define popcnti(r0, i0)                        _popcnti(_jit, r0, i0)
+static void _popcnti(jit_state_t*, jit_int32_t, jit_word_t);
+#define exti(r0, i0, i1, i2)           _exti(_jit, r0, i0, i1, i2)
+static void _exti(jit_state_t*,jit_int32_t, jit_word_t, jit_word_t, jit_word_t);
+#define exti_u(r0, i0, i1, i2)         _exti_u(_jit, r0, i0, i1, i2)
+static void _exti_u(jit_state_t*,
+                   jit_int32_t, jit_word_t, jit_word_t, jit_word_t);
+#define generic_unldr(r0, r1, i0)      _generic_unldr(_jit, r0, r1, i0)
+static void _generic_unldr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_unldi(r0, i0, i1)      _generic_unldi(_jit, r0, i0, i1)
+static void _generic_unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+#define generic_unldr_u(r0, r1, i0)    _generic_unldr_u(_jit, r0, r1, i0)
+static void _generic_unldr_u(jit_state_t*,
+                            jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_unldi_u(r0, i0, i1)    _generic_unldi_u(_jit, r0, i0, i1)
+static void _generic_unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+#define generic_unstr(r0, r1, i0)      _generic_unstr(_jit, r0, r1, i0)
+static void _generic_unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_unsti(i0, r0, i1)      _generic_unsti(_jit, i0, r0, i1)
+static void _generic_unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#if !defined(__i386__) && !defined(__x86_64__)
+#  define generic_unldr_x(r0, r1, i0)  _generic_unldr_x(_jit, r0, r1, i0)
+static void _generic_unldr_x(jit_state_t*,
+                            jit_int32_t, jit_int32_t, jit_word_t);
+#  define generic_unldi_x(r0, i0, i1)  _generic_unldi_x(_jit, r0, i0, i1)
+static void _generic_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+#  define generic_unstr_x(r0, r1, i0)  _generic_unstr_x(_jit, r0, r1, i0)
+static void _generic_unstr_x(jit_state_t*,
+                            jit_int32_t, jit_int32_t, jit_word_t);
+#  define generic_unsti_x(i0, r0, i1)  _generic_unsti_x(_jit, i0, r0, i1)
+static void _generic_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#endif
+#define patch_alist(revert)            _patch_alist(_jit, revert)
+static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert);
+
 #if defined(__i386__) || defined(__x86_64__)
 #  include "jit_x86.c"
 #elif defined(__mips__)
@@ -3514,4 +4359,1148 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link,
 #  include "jit_alpha.c"
 #elif defined(__riscv)
 #  include "jit_riscv.c"
+#elif defined(__loongarch__)
+#  include "jit_loongarch.c"
+#endif
+
+static maybe_unused void
+generic_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+    rshi(rn(reg), r1, 8);
+    andi(r0, r1, 0xff);
+    andi(rn(reg), rn(reg), 0xff);
+    lshi(r0, r0, 8);
+    orr(r0, r0, rn(reg));
+
+    jit_unget_reg(reg);
+}
+
+static maybe_unused void
+generic_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+       rshi(rn(reg), r1, 16);
+       bswapr_us(r0, r1);
+       bswapr_us(rn(reg), rn(reg));
+       lshi(r0, r0, 16);
+       orr(r0, r0, rn(reg));
+
+    jit_unget_reg(reg);
+}
+
+#if __WORDSIZE == 64
+static maybe_unused void
+generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t reg = jit_get_reg(jit_class_gpr);
+
+    rshi_u(rn(reg), r1, 32);
+    bswapr_ui(r0, r1);
+    bswapr_ui(rn(reg), rn(reg));
+    lshi(r0, r0, 32);
+    orr(r0, r0, rn(reg));
+
+    jit_unget_reg(reg);
+}
+#endif
+
+static void
+_depi(jit_state_t *_jit,
+      jit_int32_t r0, jit_word_t i0, jit_word_t i1, jit_word_t i2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    depr(r0, rn(reg), i1, i2);
+    jit_unget_reg(reg);
+}
+
+static void
+_negi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, -i0);
+}
+
+static void
+_comi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, ~i0);
+}
+
+static void
+_exti_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, (jit_int8_t)i0);
+}
+
+static void
+_exti_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, (jit_uint8_t)i0);
+}
+
+static void
+_exti_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, (jit_int16_t)i0);
+}
+
+static void
+_exti_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, (jit_uint16_t)i0);
+}
+
+#if __WORDSIZE == 64
+static void
+_exti_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, (jit_int32_t)i0);
+}
+
+static void
+_exti_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    movi(r0, (jit_uint32_t)i0);
+}
+#endif
+
+static void
+_bswapi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    union {
+       jit_uint16_t    us;
+       jit_uint8_t     v[2];
+    } l, h;
+    l.us = i0;
+    h.v[0] = l.v[1];
+    h.v[1] = l.v[0];
+    movi(r0,  h.us);
+}
+
+static void
+_bswapi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    union {
+       jit_uint32_t    ui;
+       jit_uint8_t     v[4];
+    } l, h;
+    l.ui = i0;
+    h.v[0] = l.v[3];
+    h.v[1] = l.v[2];
+    h.v[2] = l.v[1];
+    h.v[3] = l.v[0];
+    movi(r0,  h.ui);
+}
+
+#if __WORDSIZE == 64
+static void
+_bswapi_ul(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    union {
+       jit_uint64_t    ul;
+       jit_uint8_t     v[8];
+    } l, h;
+    l.ul = i0;
+    h.v[0] = l.v[7];
+    h.v[1] = l.v[6];
+    h.v[2] = l.v[5];
+    h.v[3] = l.v[4];
+    h.v[4] = l.v[3];
+    h.v[5] = l.v[2];
+    h.v[6] = l.v[1];
+    h.v[7] = l.v[0];
+    movi(r0,  h.ul);
+}
+#endif
+
+static void
+_htoni_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+    bswapi_us(r0, i0);
+#else
+    exti_us(r0, i0);
+#endif
+}
+
+static void
+_htoni_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+    bswapi_ui(r0, i0);
+#else
+#  if __WORDSIZE == 32
+    movi(r0, i0);
+#  else
+    exti_ui(r0, i0);
+#  endif
+#endif
+}
+
+#if __WORDSIZE == 64
+static void
+_htoni_ul(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+    bswapi_ul(r0, i0);
+#  else
+    movi(r0, i0);
+#  endif
+}
+#endif
+
+static void
+_movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
+{
+    union {
+       jit_int32_t     i;
+       jit_float32_t   f;
+    } data;
+    data.f = i0;
+#if defined(__ia64__)
+    /* Should be used only in this case (with out0 == 120) */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+#endif
+    movi(r0, data.i);
+}
+
+#if __WORDSIZE == 32 && !(defined(__mips__) && NEW_ABI)
+static void
+_movi_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
+{
+    union {
+       jit_int32_t     i[2];
+       jit_float64_t   d;
+    } data;
+    data.d = i0;
+    /* Mips does not change byte order of double values */
+#  if __BYTE_ORDER == __LITTLE_ENDIAN || defined(__mips__)
+    movi(r0, data.i[0]);
+    movi(r1, data.i[1]);
+#  else
+    movi(r1, data.i[0]);
+    movi(r0, data.i[1]);
+#  endif
+}
+
+#else
+static void
+_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    union {
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } data;
+    data.d = i0;
+#  if defined(__ia64__)
+    /* Should be used only in this case (with out0 == 120) */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+#  endif
+    movi(r0, data.l);
+}
+#endif
+
+ void
+_jit_negi_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v)
+{
+    jit_inc_synth_wf(negi_f, u, v);
+    jit_movi_f(u, v);
+    jit_negr_f(u, u);
+    jit_dec_synth();
+}
+
+void
+_jit_absi_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v)
+{
+    jit_inc_synth_wf(absi_f, u, v);
+    jit_movi_f(u, v);
+    jit_absr_f(u, u);
+    jit_dec_synth();
+}
+
+void
+_jit_sqrti_f(jit_state_t *_jit, jit_fpr_t u, jit_float32_t v)
+{
+    jit_inc_synth_wf(sqrti_f, u, v);
+    jit_movi_f(u, v);
+    jit_sqrtr_f(u, u);
+    jit_dec_synth();
+}
+
+void
+_jit_fmai_f(jit_state_t *_jit,
+           jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x)
+{
+    jit_int32_t                y;
+    jit_inc_synth_wqf(fmai_f, u, v, w, x);
+    if (u != v && u != w) {
+       jit_movi_f(u, x);
+       jit_fmar_f(u, v, w, u);
+    }
+    else {
+       y = jit_get_reg(jit_class_fpr);
+       jit_movi_f(y, x);
+       jit_fmar_f(u, v, w, y);
+       jit_unget_reg(y);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_fmsi_f(jit_state_t *_jit,
+           jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x)
+{
+    jit_int32_t                y;
+    jit_inc_synth_wqf(fmai_f, u, v, w, x);
+    if (u != v && u != w) {
+       jit_movi_f(u, x);
+       jit_fmsr_f(u, v, w, u);
+    }
+    else {
+       y = jit_get_reg(jit_class_fpr);
+       jit_movi_f(y, x);
+       jit_fmsr_f(u, v, w, y);
+       jit_unget_reg(y);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_fnmai_f(jit_state_t *_jit,
+            jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x)
+{
+    jit_int32_t                y;
+    jit_inc_synth_wqf(fmai_f, u, v, w, x);
+    if (u != v && u != w) {
+       jit_movi_f(u, x);
+       jit_fnmar_f(u, v, w, u);
+    }
+    else {
+       y = jit_get_reg(jit_class_fpr);
+       jit_movi_f(y, x);
+       jit_fnmar_f(u, v, w, y);
+       jit_unget_reg(y);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_fnmsi_f(jit_state_t *_jit,
+            jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float32_t x)
+{
+    jit_int32_t                y;
+    jit_inc_synth_wqf(fmai_f, u, v, w, x);
+    if (u != v && u != w) {
+       jit_movi_f(u, x);
+       jit_fnmsr_f(u, v, w, u);
+    }
+    else {
+       y = jit_get_reg(jit_class_fpr);
+       jit_movi_f(y, x);
+       jit_fnmsr_f(u, v, w, y);
+       jit_unget_reg(y);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_negi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v)
+{
+    jit_inc_synth_wd(negi_d, u, v);
+    jit_movi_d(u, v);
+    jit_negr_d(u, u);
+    jit_dec_synth();
+}
+
+void
+_jit_absi_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v)
+{
+    jit_inc_synth_wd(absi_d, u, v);
+    jit_movi_d(u, v);
+    jit_absr_d(u, u);
+    jit_dec_synth();
+}
+
+void
+_jit_sqrti_d(jit_state_t *_jit, jit_fpr_t u, jit_float64_t v)
+{
+    jit_inc_synth_wd(sqrti_d, u, v);
+    jit_movi_d(u, v);
+    jit_sqrtr_d(u, u);
+    jit_dec_synth();
+}
+
+void
+_jit_fmai_d(jit_state_t *_jit,
+           jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x)
+{
+    jit_int32_t                y;
+    jit_inc_synth_wqd(fmai_d, u, v, w, x);
+    if (u != v && u != w) {
+       jit_movi_d(u, x);
+       jit_fmar_d(u, v, w, u);
+    }
+    else {
+       y = jit_get_reg(jit_class_fpr);
+       jit_movi_d(y, x);
+       jit_fmar_d(u, v, w, y);
+       jit_unget_reg(y);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_fmsi_d(jit_state_t *_jit,
+           jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x)
+{
+    jit_int32_t                y;
+    jit_inc_synth_wqd(fmai_d, u, v, w, x);
+    if (u != v && u != w) {
+       jit_movi_d(u, x);
+       jit_fmsr_d(u, v, w, u);
+    }
+    else {
+       y = jit_get_reg(jit_class_fpr);
+       jit_movi_d(y, x);
+       jit_fmsr_d(u, v, w, y);
+       jit_unget_reg(y);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_fnmai_d(jit_state_t *_jit,
+            jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x)
+{
+    jit_int32_t                y;
+    jit_inc_synth_wqd(fmai_d, u, v, w, x);
+    if (u != v && u != w) {
+       jit_movi_d(u, x);
+       jit_fnmar_d(u, v, w, u);
+    }
+    else {
+       y = jit_get_reg(jit_class_fpr);
+       jit_movi_d(y, x);
+       jit_fnmar_d(u, v, w, y);
+       jit_unget_reg(y);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_fnmsi_d(jit_state_t *_jit,
+            jit_fpr_t u, jit_fpr_t v, jit_fpr_t w, jit_float64_t x)
+{
+    jit_int32_t                y;
+    jit_inc_synth_wqd(fmai_d, u, v, w, x);
+    if (u != v && u != w) {
+       jit_movi_d(u, x);
+       jit_fnmsr_d(u, v, w, u);
+    }
+    else {
+       y = jit_get_reg(jit_class_fpr);
+       jit_movi_d(y, x);
+       jit_fnmsr_d(u, v, w, y);
+       jit_unget_reg(y);
+    }
+    jit_dec_synth();
+}
+
+static void
+_cloi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    clzi(r0, ~i0);
+}
+
+static void
+_clzi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+#if __WORDSIZE == 64 && _WIN32
+    movi(r0, (i0) ? __builtin_clzll(i0) : __WORDSIZE);
+#else
+    movi(r0, (i0) ? __builtin_clzl(i0) : __WORDSIZE);
+#endif
+}
+
+static void
+_ctoi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    ctzi(r0, ~i0);
+}
+
+static void
+_ctzi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+#if __WORDSIZE == 64 && _WIN32
+    movi(r0, (i0) ? __builtin_ctzll(i0) : __WORDSIZE);
+#else
+    movi(r0, (i0) ? __builtin_ctzl(i0) : __WORDSIZE);
+#endif
+}
+
+static void
+_rbiti(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                i;
+    union {
+       jit_uword_t     w;
+       jit_uint8_t     v[__WORDSIZE >> 3];
+    } u, v;
+    static const unsigned char swap_tab[256] = {
+        0, 128, 64, 192, 32, 160,  96, 224,
+       16, 144, 80, 208, 48, 176, 112, 240,
+        8, 136, 72, 200, 40, 168, 104, 232,
+       24, 152, 88, 216 ,56, 184, 120, 248,
+        4, 132, 68, 196, 36, 164, 100, 228,
+       20, 148, 84, 212, 52, 180, 116, 244,
+       12, 140, 76, 204, 44, 172, 108, 236,
+       28, 156, 92, 220, 60, 188, 124, 252,
+        2, 130, 66, 194, 34, 162,  98, 226,
+       18, 146, 82, 210, 50, 178, 114, 242,
+       10, 138, 74, 202, 42, 170, 106, 234,
+       26, 154, 90, 218, 58, 186, 122, 250,
+        6, 134, 70, 198, 38, 166, 102, 230,
+       22, 150, 86, 214, 54, 182, 118, 246,
+       14, 142, 78, 206, 46, 174, 110, 238,
+       30, 158, 94, 222, 62, 190, 126, 254,
+        1, 129, 65, 193, 33, 161,  97, 225,
+       17, 145, 81, 209, 49, 177, 113, 241,
+        9, 137, 73, 201, 41, 169, 105, 233,
+       25, 153, 89, 217, 57, 185, 121, 249,
+        5, 133, 69, 197, 37, 165, 101, 229,
+       21, 149, 85, 213, 53, 181, 117, 245,
+       13, 141, 77, 205, 45, 173, 109, 237,
+       29, 157, 93, 221, 61, 189, 125, 253,
+        3, 131, 67, 195, 35, 163,  99, 227,
+       19, 147, 83, 211, 51, 179, 115, 243,
+       11, 139, 75, 203, 43, 171, 107, 235,
+       27, 155, 91, 219, 59, 187, 123, 251,
+        7, 135, 71, 199, 39, 167, 103, 231,
+       23, 151, 87, 215, 55, 183, 119, 247,
+       15, 143, 79, 207, 47, 175, 111, 239,
+       31, 159, 95, 223, 63, 191, 127, 255
+    };
+    u.w = i0;
+    for (i = 0; i < sizeof(jit_word_t); ++i)
+       v.v[i] = swap_tab[u.v[sizeof(jit_word_t) - i - 1]];
+    movi(r0, v.w);
+}
+
+static void
+_popcnti(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+#if __WORDSIZE == 64 && _WIN32
+    movi(r0, (i0) ? __builtin_popcountll(i0) : __WORDSIZE);
+#else
+    movi(r0, (i0) ? __builtin_popcountl(i0) : __WORDSIZE);
+#endif
+}
+
+static void _exti(jit_state_t *_jit,
+                 jit_int32_t r0, jit_word_t i0, jit_word_t i1, jit_word_t i2)
+{
+#if __BYTE_ORDER == __BIG_ENDIAN
+    i1 = __WORDSIZE - (i1 + i2);
+#endif
+    i0 <<= __WORDSIZE - (i1 + i2);
+    i0 >>= __WORDSIZE - i2;
+    movi(r0, i0);
+}
+
+static void _exti_u(jit_state_t *_jit,
+                   jit_int32_t r0, jit_word_t i0, jit_word_t i1, jit_word_t i2)
+{
+    jit_word_t         t;
+#if __BYTE_ORDER == __BIG_ENDIAN
+    i1 = __WORDSIZE - (i1 + i2);
+#endif
+    if (i1)
+       i0 >>= __WORDSIZE - i2;
+#if __WORDSIZE == 64 && !_WIN32
+    i0 &= (1L << i2) - 1;
+#else
+    i0 &= (1LL << i2) - 1;
+#endif
+    movi(r0, i0);
+}
+
+static void
+_generic_unldr(jit_state_t *_jit,
+              jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0, r2;
+    assert(i0 >= 1 && i0 <= sizeof(jit_word_t));
+    if (i0 & (i0 - 1)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       r2 = rn(t0);
+       movr(r2, r1);
+    }
+    switch (i0) {
+       case 1:
+           ldr_c(r0, r1);
+           break;
+       case 2:
+           ldr_s(r0, r1);
+           break;
+       case 3:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldr_us(r0, r2);
+           ldxi_c(r2, r2, 2);
+           lshi(r2, r2, 16);
+#else
+           ldr_c(r0, r2);
+           lshi(r0, r0, 16);
+           ldxi_us(r2, r2, 1);
+#endif
+           break;
+#if __WORDSIZE == 32
+       default:
+           ldr_i(r0, r1);
+           break;
+#else
+       case 4:
+           ldr_i(r0, r1);
+           break;
+       case 5:
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldr_ui(r0, r2);
+           ldxi_c(r2, r2, 4);
+           lshi(r2, r2, 32);
+#  else
+           ldr_i(r0, r2);
+           lshi(r0, r0, 8);
+           ldxi_uc(r2, r2, 4);
+#  endif
+           break;
+       case 6:
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldr_ui(r0, r2);
+           ldxi_s(r2, r2, 4);
+           lshi(r2, r2, 32);
+#  else
+           ldr_i(r0, r2);
+           lshi(r0, r0, 16);
+           ldxi_us(r2, r2, 4);
+#  endif
+           break;
+       case 7:
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldr_ui(r0, r2);
+           ldxi_i(r2, r2, 4);
+           lshi(r2, r2, 40);
+           rshi(r2, r2, 8);
+#  else
+           ldr_i(r0, r2);
+           lshi(r0, r0, 24);
+           ldxi_ui(r2, r2, 4);
+           rshi(r2, r2, 8);
+#  endif
+           break;
+       default:
+           ldr_l(r0, r1);
+           break;
+#endif
+    }
+    if (i0 & (i0 - 1)) {
+       orr(r0, r0, r2);
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_generic_unldi(jit_state_t *_jit,
+              jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+    jit_int32_t                t0, r2;
+    assert(i1 >= 1 && i1 <= sizeof(jit_word_t));
+    if (i1 & (i1 - 1)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       r2 = rn(t0);
+    }
+    switch (i1) {
+       case 1:
+           ldi_c(r0, i0);
+           break;
+       case 2:
+           ldi_s(r0, i0);
+           break;
+       case 3:
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldi_us(r0, i0);
+           ldi_c(r2, i0 + 2);
+           lshi(r2, r2, 16);
+#  else
+           ldi_c(r0, i0);
+           lshi(r0, r0, 16);
+           ldi_us(r2, i0 + 1);
+#  endif
+           break;
+#  if __WORDSIZE == 32
+       default:
+           ldi_i(r0, i0);
+           break;
+#  else
+       case 4:
+           ldi_i(r0, i0);
+           break;
+       case 5:
+#   if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldi_ui(r0, i0);
+           ldi_c(r2, i0 + 4);
+           lshi(r2, r2, 32);
+#   else
+           ldi_i(r0, i0);
+           lshi(r0, r0, 8);
+           ldi_uc(r2, i0 + 4);
+#   endif
+           break;
+       case 6:
+#   if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldi_ui(r0, i0);
+           ldi_s(r2, i0 + 4);
+           lshi(r2, r2, 32);
+#   else
+           ldi_i(r0, i0);
+           lshi(r0, r0, 16);
+           ldi_us(r2, i0 + 4);
+#   endif
+           break;
+       case 7:
+#   if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldi_ui(r0, i0);
+           ldi_i(r2, i0 + 4);
+           lshi(r2, r2, 40);
+           rshi(r2, r2, 8);
+#   else
+           ldi_i(r0, i0);
+           lshi(r0, r0, 24);
+           ldi_ui(r2, i0 + 4);
+           rshi(r2, r2, 8);
+#   endif
+           break;
+       default:
+           ldi_l(r0, i0);
+           break;
+#  endif
+    }
+    if (i1 & (i1 - 1)) {
+       orr(r0, r0, r2);
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_generic_unldr_u(jit_state_t *_jit,
+                jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                t0, r2;
+    assert(i0 >= 1 && i0 <= sizeof(jit_word_t));
+    if (i0 & (i0 - 1)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       r2 = rn(t0);
+       movr(r2, r1);
+    }
+    switch (i0) {
+       case 1:
+           ldr_uc(r0, r1);
+           break;
+       case 2:
+           ldr_us(r0, r1);
+           break;
+       case 3:
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldr_us(r0, r2);
+           ldxi_uc(r2, r2, 2);
+           lshi(r2, r2, 16);
+#  else
+           ldr_uc(r0, r2);
+           lshi(r0, r0, 16);
+           ldxi_us(r2, r2, 1);
+#  endif
+           break;
+#  if __WORDSIZE == 32
+       default:
+           ldr_i(r0, r1);
+           break;
+#  else
+       case 4:
+           ldr_ui(r0, r1);
+           break;
+       case 5:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldr_ui(r0, r2);
+           ldxi_uc(r2, r2, 4);
+           lshi(r2, r2, 32);
+#    else
+           ldr_ui(r0, r2);
+           lshi(r0, r0, 8);
+           ldxi_uc(r2, r2, 4);
+#    endif
+           break;
+       case 6:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldr_ui(r0, r2);
+           ldxi_us(r2, r2, 4);
+           lshi(r2, r2, 32);
+#    else
+           ldr_ui(r0, r2);
+           lshi(r0, r0, 16);
+           ldxi_us(r2, r2, 4);
+#    endif
+           break;
+       case 7:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldr_ui(r0, r2);
+           ldxi_ui(r2, r2, 4);
+           lshi(r2, r2, 40);
+           rshi_u(r2, r2, 8);
+#    else
+           ldr_ui(r0, r2);
+           lshi(r0, r0, 24);
+           ldxi_ui(r2, r2, 4);
+           rshi(r2, r2, 8);
+#    endif
+           break;
+       default:
+           ldr_l(r0, r1);
+           break;
+#  endif
+    }
+    if (i0 & (i0 - 1)) {
+       orr(r0, r0, r2);
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_generic_unldi_u(jit_state_t *_jit,
+                jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+    jit_int32_t                t0, r2;
+    assert(i1 >= 1 && i1 <= sizeof(jit_word_t));
+    if (i1 & (i1 - 1)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       r2 = rn(t0);
+    }
+    switch (i1) {
+       case 1:
+           ldi_uc(r0, i0);
+           break;
+       case 2:
+           ldi_us(r0, i0);
+           break;
+       case 3:
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldi_us(r0, i0);
+           ldi_uc(r2, i0 + 2);
+           lshi(r2, r2, 16);
+#  else
+           ldi_uc(r0, i0);
+           lshi(r0, r0, 16);
+           ldi_us(r2, i0 + 1);
+#  endif
+           break;
+#  if __WORDSIZE == 32
+       default:
+           ldi_i(r0, i0);
+           break;
+#  else
+       case 4:
+           ldi_ui(r0, i0);
+           break;
+       case 5:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldi_ui(r0, i0);
+           ldi_uc(r2, i0 + 4);
+           lshi(r2, r2, 32);
+#    else
+           ldi_ui(r0, i0);
+           lshi(r0, r0, 8);
+           ldi_uc(r2, i0 + 4);
+#    endif
+           break;
+       case 6:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldi_ui(r0, i0);
+           ldi_us(r2, i0 + 4);
+           lshi(r2, r2, 32);
+#    else
+           ldi_ui(r0, i0);
+           lshi(r0, r0, 16);
+           ldi_us(r2, i0 + 4);
+#    endif
+           break;
+       case 7:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           ldi_ui(r0, i0);
+           ldi_ui(r2, i0 + 4);
+           lshi(r2, r2, 40);
+           rshi_u(r2, r2, 8);
+#    else
+           ldi_ui(r0, i0);
+           lshi(r0, r0, 24);
+           ldi_ui(r2, i0 + 4);
+           rshi(r2, r2, 8);
+#    endif
+           break;
+       default:
+           ldi_l(r0, i0);
+           break;
+#  endif
+    }
+    if (i1 & (i1 - 1)) {
+       orr(r0, r0, r2);
+       jit_unget_reg(t0);
+    }
+}
+
+static void
+_generic_unstr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_word_t         t0, r2;
+    assert(i0 > 0 && i0 <= sizeof(jit_word_t));
+    if (i0 & (i0 - 1)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       r2 = rn(t0);
+    }
+    switch (i0) {
+       case 1:
+           str_c(r0, r1);
+           break;
+       case 2:
+           str_s(r0, r1);
+           break;
+       case 3:
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+           str_s(r0, r1);
+           rshi(r2, r1, 16);
+           stxi_c(2, r0, r2);
+#  else
+           stxi_c(2, r0, r1);
+           rshi(r2, r1, 8);
+           str_s(r0, r2);
+#  endif
+           break;
+#  if __WORDSIZE == 32
+       default:
+           str_i(r0, r1);
+           break;
+#  else
+       case 4:
+           str_i(r0, r1);
+           break;
+       case 5:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           str_i(r0, r1);
+           rshi(r2, r1, 32);
+           stxi_c(4, r0, r2);
+#    else
+           stxi_c(4, r0, r1);
+           rshi(r2, r1, 8);
+           str_i(r0, r2);
+#    endif
+           break;
+       case 6:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           str_i(r0, r1);
+           rshi(r2, r1, 32);
+           stxi_s(4, r0, r2);
+#    else
+           stxi_s(4, r0, r1);
+           rshi(r2, r1, 16);
+           str_i(r0, r2);
+#    endif
+           break;
+       case 7:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           str_i(r0, r1);
+           rshi(r2, r1, 32);
+           stxi_s(4, r0, r2);
+           rshi(r2, r2, 16);
+           stxi_c(6, r0, r2);
+#    else
+           stxi_c(6, r0, r1);
+           rshi(r2, r1, 8);
+           stxi_s(4, r0, r2);
+           rshi(r2, r2, 16);
+           str_i(r0, r2);
+#    endif
+           break;
+       default:
+           str_l(r0, r1);
+           break;
+#  endif
+    }
+    if (i0 & (i0 - 1))
+       jit_unget_reg(t0);
+}
+
+static void
+_generic_unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         t0, r2;
+    assert(i1 > 0 && i1 <= sizeof(jit_word_t));
+    if (i1 & (i1 - 1)) {
+       t0 = jit_get_reg(jit_class_gpr);
+       r2 = rn(t0);
+    }
+    switch (i1) {
+       case 1:
+           sti_c(i0, r0);
+           break;
+       case 2:
+           sti_s(i0, r0);
+           break;
+       case 3:
+#  if __BYTE_ORDER == __LITTLE_ENDIAN
+           sti_s(i0, r0);
+           rshi(r2, r0, 16);
+           sti_c(2 + i0, r2);
+#  else
+           sti_c(2 + i0, r0);
+           rshi(r2, r0, 8);
+           sti_s(i0, r2);
+#  endif
+           break;
+#  if __WORDSIZE == 32
+       default:
+           sti_i(i0, r0);
+           break;
+#  else
+       case 4:
+           sti_i(i0, r0);
+           break;
+       case 5:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           sti_i(i0, r0);
+           rshi(r2, r0, 32);
+           sti_c(4 + i0, r2);
+#    else
+           stxi_c(4, i0, r0);
+           rshi(r2, r0, 8);
+           sti_i(i0, r2);
+#    endif
+           break;
+       case 6:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           sti_i(i0, r0);
+           rshi(r2, r0, 32);
+           sti_s(4 + i0, r2);
+#    else
+           sti_s(4 + i0, r0);
+           rshi(r2, r0, 16);
+           sti_i(i0, r2);
+#    endif
+           break;
+       case 7:
+#    if __BYTE_ORDER == __LITTLE_ENDIAN
+           sti_i(i0, r0);
+           rshi(r2, r0, 32);
+           sti_s(4 + i0, r2);
+           rshi(r2, r2, 16);
+           sti_c(6 + i0, r2);
+#    else
+           sti_c(6 + i0, r0);
+           rshi(r2, r0, 8);
+           sti_s(4 + i0, r2);
+           rshi(r2, r2, 16);
+           sti_i(i0, r2);
+#    endif
+           break;
+       default:
+           sti_l(i0, r0);
+           break;
+#  endif
+    }
+    if (i1 & (i1 - 1))
+       jit_unget_reg(t0);
+}
+
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__)
+static void
+_generic_unldr_x(jit_state_t *_jit,
+                jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 == 4 || i0 == 8);
+    if (i0 == 4)
+       ldr_f(r0, r1);
+    else
+       ldr_d(r0, r1);
+}
+
+static void
+_generic_unldi_x(jit_state_t *_jit,
+                jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+    assert(i1 == 4 || i1 == 8);
+    if (i1 == 4)
+       ldi_f(r0, i0);
+    else
+       ldi_d(r0, i0);
+}
+
+static void
+_generic_unstr_x(jit_state_t *_jit,
+                jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(i0 == 4 || i0 == 8);
+    if (i0 == 4)
+       str_f(r0, r1);
+    else
+       str_d(r0, r1);
+}
+
+static void
+_generic_unsti_x(jit_state_t *_jit,
+                jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    assert(i1 == 4 || i1 == 8);
+    if (i1 == 4)
+       sti_f(i0, r0);
+    else
+       sti_d(i0, r0);
+}
+#endif
+
+#if defined(stack_framesize)
+static maybe_unused void
+_patch_alist(jit_state_t *_jit, jit_bool_t revert)
+{
+    jit_int32_t                 diff;
+    jit_node_t         *node;
+    diff = jit_diffsize();
+    if (diff) {
+       if (revert)
+           diff = -diff;
+       for (node = _jitc->function->alist; node; node = node->link) {
+           switch (node->code) {
+               case jit_code_ldxi_c:   case jit_code_ldxi_uc:
+               case jit_code_ldxi_s:   case jit_code_ldxi_us:
+               case jit_code_ldxi_i:
+#if __WORDSIZE == 64
+               case jit_code_ldxi_ui:  case jit_code_ldxi_l:
+#endif
+               case jit_code_ldxi_f:   case jit_code_ldxi_d:
+                   node->w.w -= diff;
+                   break;
+               case jit_code_stxi_c:   case jit_code_stxi_s:
+               case jit_code_stxi_i:
+#if __WORDSIZE == 64
+               case jit_code_stxi_l:
+#endif
+               case jit_code_stxi_f:   case jit_code_stxi_d:
+                   node->u.w -= diff;
+                   break;
+               default:
+                   abort();
+           }
+       }
+    }
+}
 #endif