+ reg = jit_get_reg(jit_class_gpr);
+ POPCNTB(r0, r1);
+#if __WORDSIZE == 32
+ movi(rn(reg), 0x01010101);
+#else
+ movi(rn(reg), 0x0101010101010101);
+#endif
+ mullr(r0, r0, rn(reg));
+ rshi_u(r0, r0, __WORDSIZE - 8);
+ jit_unget_reg(reg);
+}
+
+static void
+_extr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0 ,jit_word_t i1)
+{
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ if ( i1 == __WORDSIZE)
+ movr(r0, r1);
+ else {
+# if __BYTE_ORDER == __BIG_ENDIAN
+ i0 = __WORDSIZE - (i0 + i1);
+# endif
+ if (__WORDSIZE - (i0 + i1)) {
+ lshi(r0, r1, __WORDSIZE - (i0 + i1));
+ rshi(r0, r0, __WORDSIZE - i1);
+ }
+ else
+ rshi(r0, r1, __WORDSIZE - i1);
+ }
+}
+
+static void
+_extr_u(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0 ,jit_word_t i1)
+{
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ if (i1 == __WORDSIZE)
+ movr(r0, r1);
+ else {
+# if __BYTE_ORDER == __BIG_ENDIAN
+ i0 = __WORDSIZE - (i0 + i1);
+# endif
+# if __WORDSIZE == 32
+ RLWINM(r0, r1, (32 - i0) & 0x1f, 32 - i1, 31);
+# else
+ RLDICL(r0, r1, (64 - i0) & 0x3f, 64 - i1);
+# endif
+ }
+}
+
+static void
+_depr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0 ,jit_word_t i1)
+{
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ if (i1 == __WORDSIZE)
+ movr(r0, r1);
+ else {
+# if __BYTE_ORDER == __BIG_ENDIAN
+ i0 = __WORDSIZE - (i0 + i1);
+# endif
+#if __WORDSIZE == 32
+ RLWIMI(r0, r1, i0, 32 - (i0 + i1), 31 - i0);
+#else
+ RLDIMI(r0, r1, i0, 64 - (i0 + i1));
+#endif
+ }
+}
+
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
+{
+ jit_int32_t reg, addr_reg;
+
+ /* Convert load followed by bswap to a single instruction */
+ /* FIXME r0 and r1 do not need to be the same, only must check if
+ * r1 was loaded in previous instruction */
+ if (no_flag && r0 == r1) {
+ if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00022e | r0 << 21)) {
+ /* Convert LHZX to LHBRX */
+ _jit->pc.ui--;
+ LHBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f);
+ return;
+ }
+
+ if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0xa0000000 | r0 << 21)) {
+ /* Convert LHZ to LHBRX */
+ _jit->pc.ui--;
+ addr_reg = (*_jit->pc.ui >> 16) & 0x1f;
+
+ reg = jit_get_reg(jit_class_gpr);
+ LI(rn(reg), (short)*_jit->pc.ui);
+ LHBRX(r0, rn(reg), addr_reg);
+ jit_unget_reg(reg);
+ return;
+ }
+ }
+
+ if (r0 == r1) {
+ RLWIMI(r0, r0, 16, 8, 15);
+ RLWINM(r0, r0, 24, 16, 31);
+ } else {
+ RLWINM(r0, r1, 8, 16, 23);
+ RLWIMI(r0, r1, 24, 24, 31);
+ }
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
+{
+ jit_int32_t reg, addr_reg;
+
+ /* Convert load followed by bswap to a single instruction */
+ /* FIXME r0 and r1 do not need to be the same, only must check if
+ * r1 was loaded in previous instruction */
+ if (no_flag && r0 == r1) {
+ if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00002e | r0 << 21)) {
+ /* Convert LWZX to LWBRX */
+ _jit->pc.ui--;
+ LWBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f);
+ return;
+ }
+
+ if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0x80000000 | r0 << 21)) {
+ /* Convert LWZ to LWBRX */
+ _jit->pc.ui--;
+ addr_reg = (*_jit->pc.ui >> 16) & 0x1f;
+
+ reg = jit_get_reg(jit_class_gpr);
+ LI(rn(reg), (short)*_jit->pc.ui);
+ LWBRX(r0, rn(reg), addr_reg);
+ jit_unget_reg(reg);
+ return;
+ }
+ }
+