+ movr(r0, r1);
+ SRL(r0, i0, 0);
+}
+#endif
+
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t t0, t1, t2, t3, t4;
+ movr(r0, r1);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ t2 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+ rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+ rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+ rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
+ rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# if __WORDSIZE == 32
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ lshi(rn(t2), r0, 16); /* t2 = v << 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# else
+ movi(rn(t0), 0x0000ffff0000ffffL);
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
+ lshi(rn(t2), r0, 32); /* t2 = v << 32 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# endif
+ jit_unget_reg(t2);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ comr(r0, r1);
+ clzr(r0, r0);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_clo(r0, r1);
+#endif
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+#if __WORDSIZE == 32
+ jit_word_t w;
+#endif
+ jit_int32_t regno;
+ regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+ SLLG(rn(regno), r1, 32, 0);
+#else
+ movr(rn(regno), r1);
+#endif
+ FLOGR(rn(regno), rn(regno));
+ movr(r0, rn(regno));
+#if __WORDSIZE == 32
+ w = blei_p(_jit->pc.w, r0, 31);
+ rshi(r0, r0, 1); /* r0 is 64 */
+ patch_at(w, _jit->pc.w);
+#endif
+ jit_unget_reg_pair(regno);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_clz(r0, r1);
+#endif
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ bitswap(r0, r1);
+ clor(r0, r0);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_cto(r0, r1);
+#endif
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ bitswap(r0, r1);
+ clzr(r0, r0);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_ctz(r0, r1);
+#endif