/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# define XCMPLI(cr,l,a,u) FCI(10,cr,l,a,u)
# define CMPLDI(a,s) XCMPLI(0,1,a,s)
# define CMPLWI(a,s) XCMPLI(0,0,a,s)
+# if __WORDSIZE == 32
+# define CMPX(a,b) CMPW(a,b)
+# define CMPXI(a,s) CMPWI(a,s)
+# define CMPLX(a,b) CMPLW(a,b)
+# define CMPLXI(a,s) CMPLWI(a,s)
+# else
+# define CMPX(a,b) CMPD(a,b)
+# define CMPXI(a,s) CMPDI(a,s)
+# define CMPLX(a,b) CMPLD(a,b)
+# define CMPLXI(a,s) CMPLDI(a,s)
+# endif
# define CNTLZW(a,s) FX(31,s,a,0,26)
# define CNTLZW_(a,s) FX_(31,s,a,0,26)
+# define CNTLZD(a,s) FX(31,s,a,0,58)
+# define CNTLZD_(a,s) FX_(31,s,a,0,58)
# define CRAND(d,a,b) FX(19,d,a,b,257)
# define CRANDC(d,a,b) FX(19,d,a,b,129)
# define CREQV(d,a,b) FX(19,d,a,b,289)
#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define negr(r0,r1) NEG(r0,r1)
# define comr(r0,r1) NOT(r0,r1)
+# define bitswap(r0, r1) _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# if __WORDSIZE == 32
+# define clzr(r0, r1) CNTLZW(r0, r1)
+# else
+# define clzr(r0, r1) CNTLZD(r0, r1)
+# endif
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define extr_c(r0,r1) EXTSB(r0,r1)
# define extr_uc(r0,r1) ANDI_(r0,r1,0xff)
# define extr_s(r0,r1) EXTSH(r0,r1)
# define callr(r0,i0) _callr(_jit,r0,i0)
static void _callr(jit_state_t*,jit_int32_t,jit_int32_t);
# define calli(i0,i1) _calli(_jit,i0,i1)
-static void _calli(jit_state_t*,jit_word_t,jit_int32_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t,jit_int32_t);
# define calli_p(i0,i1) _calli_p(_jit,i0,i1)
static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t);
# else
# define callr(r0) _callr(_jit,r0)
static void _callr(jit_state_t*,jit_int32_t);
# define calli(i0) _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
# define calli_p(i0) _calli_p(_jit,i0)
static jit_word_t _calli_p(jit_state_t*,jit_word_t);
#endif
static void
_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPWI(r2, 0);
+ CMPXI(r2, 0);
BEQ(8);
MR(r0, r1);
}
static void
_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPWI(r2, 0);
+ CMPXI(r2, 0);
BNE(8);
MR(r0, r1);
}
jit_unget_reg(r1_reg);
}
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int v; // 32-bit word to reverse bit order
+
+// swap odd and even bits
+v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+// swap consecutive pairs
+v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+// swap nibbles ...
+v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+// swap bytes
+v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+// swap 2-byte long pairs
+v = ( v >> 16 ) | ( v << 16);
+ */
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t t0, t1, t2, t3, t4;
+ movr(r0, r1);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ t2 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+ rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+ rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+ rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
+ rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# if __WORDSIZE == 32
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ lshi(rn(t2), r0, 16); /* t2 = v << 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# else
+ movi(rn(t0), 0x0000ffff0000ffffL);
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
+ lshi(rn(t2), r0, 32); /* t2 = v << 32 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# endif
+ jit_unget_reg(t2);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ bitswap(r0, r1);
+ clor(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ bitswap(r0, r1);
+ clzr(r0, r0);
+}
+
static void
_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
{
static void
_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_LT);
}
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
MFCR(r0);
static void
_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
CRNOT(CR_GT, CR_GT);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_GT);
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
CRNOT(CR_GT, CR_GT);
static void
_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_EQ);
}
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else if (can_zero_extend_short_p(i0))
CMPLWI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
MFCR(r0);
static void
_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
CRNOT(CR_LT, CR_LT);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_LT);
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
CRNOT(CR_LT, CR_LT);
static void
_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_GT);
}
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
MFCR(r0);
static void
_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
CRNOT(CR_EQ, CR_EQ);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_EQ);
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else if (can_zero_extend_short_p(i0))
CMPLWI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
CRNOT(CR_EQ, CR_EQ);
_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BLT(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BLE(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BEQ(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else if (can_zero_extend_short_p(i1))
CMPLWI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BGE(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BGT(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BNE(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else if (can_zero_extend_short_p(i1))
CMPLWI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
jit_int32_t reg;
if (r1 == _R0_REGNO) {
if (r2 != _R0_REGNO)
- LWZX(r0, r2, r1);
+ LWAX(r0, r2, r1);
else {
reg = jit_get_reg(jit_class_gpr);
movr(rn(reg), r1);
}
}
else
- LWZX(r0, r1, r2);
+ LWAX(r0, r1, r2);
}
static void
}
/* assume fixed address or reachable address */
-static void
+static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0
# if _CALL_SYSV
, jit_int32_t varargs
# endif
)
{
+ jit_word_t w;
# if _CALL_SYSV
jit_word_t d;
d = (i0 - _jit->pc.w - !!varargs * 4) & ~3;
if (can_sign_extend_jump_p(d)) {
- /* Tell double arguments were passed in registers. */
- if (varargs)
- CREQV(6, 6, 6);
- BL(d);
- } else
+ /* Tell double arguments were passed in registers. */
+ if (varargs)
+ CREQV(6, 6, 6);
+ w = _jit->pc.w;
+ BL(d);
+ }
+ else
# endif
{
+ w = _jit->pc.w;
movi(_R12_REGNO, i0);
callr(_R12_REGNO
# if _CALL_SYSV
# endif
);
}
+ return (w);
}
/* absolute jump */
if (!can_sign_extend_short_p(d)) {
/* use absolute address */
assert(can_sign_extend_short_p(label));
- d |= 2;
+ d = label | 2;
}
u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe);
break;
if (!can_sign_extend_jump_p(d)) {
/* use absolute address */
assert(can_sign_extend_jump_p(label));
- d |= 2;
+ d = label | 2;
}
- u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe);
+ u.i[0] = (u.i[0] & ~0x3fffffc) | (d & 0x3fffffd);
break;
case 15: /* LI */
#if __WORDSIZE == 32