+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = beqi(_jit->pc.w, r2, 0);
+ ORI(r1, 0, r0);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = bnei(_jit->pc.w, r2, 0);
+ ORI(r1, 0, r0);
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t iscasi, r1_reg;
+ if ((iscasi = (r1 == _NOREG))) {
+ r1_reg = jit_get_reg(jit_class_gpr);
+ r1 = rn(r1_reg);
+ movi(r1, i0);
+ }
+ /* Do not clobber r2 */
+ movr(r0, r3);
+ /* The CASXA instruction compares the value in register r[rs2] with
+ * the doubleword in memory pointed to by the doubleword address in
+ * r[rs1]. If the values are equal, the value in r[rd] is swapped
+ * with the doubleword pointed to by the doubleword address in r[rs1].
+ * If the values are not equal, the contents of the doubleword pointed
+ * to by r[rs1] replaces the value in r[rd], but the memory location
+ * remains unchanged.
+ */
+# if __WORDSIZE == 32
+ CASA(r1, r2, r0);
+# else
+ CASXA(r1, r2, r0);
+# endif
+ eqr(r0, r0, r2);
+ if (iscasi)
+ jit_unget_reg(r1_reg);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.lzcnt) {
+ comr(r0, r1);
+ clzr(r0, r0);
+ }
+ else
+ fallback_clo(r0, r1);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+# if CHECK_LZCNT
+ if (jit_cpu.lzcnt) {
+# if __WORDSIZE == 32
+ jit_word_t w;
+ SLLXI(r1, 32, r0);
+ LZCNT(r0, r0);
+ w = blei(_jit->pc.w, r0, 31);
+ rshi(r0, r0, 1); /* r0 is 64 */
+ patch_at(w, _jit->pc.w);
+# else
+ LZCNT(r1, r0);
+# endif
+ }
+ else
+# endif
+ fallback_clz(r0, r1);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.lzcnt) {
+ comr(r0, r1);
+ ctzr(r0, r0);
+ }
+ else
+ fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t t0, t1;
+ if (jit_cpu.lzcnt) {
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ negr(rn(t0), r1);
+ andr(rn(t0), rn(t0), r1);
+ clzr(r0, rn(t0));
+ xori(rn(t1), r0, __WORDSIZE - 1);
+ movnr(r0, rn(t1), rn(t0));
+ jit_unget_reg(t0);
+ jit_unget_reg(t1);
+ }
+ else
+ fallback_ctz(r0, r1);
+}
+