/*
- * Copyright (C) 2013-2019 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#if PROTO
# if __WORDSIZE == 32
# define ldr(r0,r1) ldr_i(r0,r1)
+# define ldi(r0,i0) ldi_i(r0,i0)
# define ldxr(r0,r1,r2) ldxr_i(r0,r1,r2)
# define ldxi(r0,r1,i0) ldxi_i(r0,r1,i0)
+# define str(r0,r1) str_i(r0,r1)
+# define sti(i0,r0) sti_i(i0,r0)
+# define stxr(r0,r1,r2) stxr_i(r0,r1,r2)
# define stxi(i0,r0,r1) stxi_i(i0,r0,r1)
# else
# define ldr(r0,r1) ldr_l(r0,r1)
+# define ldi(r0,i0) ldi_l(r0,i0)
# define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2)
# define ldxi(r0,r1,i0) ldxi_l(r0,r1,i0)
+# define str(r0,r1) str_l(r0,r1)
+# define sti(i0,r0) sti_l(i0,r0)
+# define stxr(r0,r1,r2) stxr_l(r0,r1,r2)
# define stxi(i0,r0,r1) stxi_l(i0,r0,r1)
# endif
# define is(i) *_jit->pc.us++ = i
# define EAR(R1,R2) RRE_(0xB24F,R1,R2)
/* EXTRACT PSW */
# define EPSW(R1,R2) RRE_(0xB98D,R1,R2)
+/* FIND LEFTMOST ONE */
+# define FLOGR(R1,R2) RRE_(0xB983,R1,R2)
/* INSERT CHARACTER */
# define IC(R1,D2,X2,B2) RX_(0x43,R1,X2,B2,D2)
# define ICY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x73)
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# if __WORDSIZE == 64
+#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#endif
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
+static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
+static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2)
static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0)
static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+# define hmulr(r0, r1, r2) qmulr(JIT_NOREG, r0, r1, r2)
+# define hmuli(r0, r1, i0) qmuli(JIT_NOREG, r0, r1, i0)
+# define hmulr_u(r0, r1, r2) qmulr_u(JIT_NOREG, r0, r1, r2)
+# define hmuli_u(r0, r1, i0) qmuli_u(JIT_NOREG, r0, r1, i0)
# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3)
static void _qmulr(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t);
# if __WORDSIZE == 32
# define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2)
static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-# else
-# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2)
-# endif
-# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0)
+# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0)
static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-# if __WORDSIZE == 32
# define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2)
static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-# else
-# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2)
-# endif
-# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0)
+# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0);
static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-# if __WORDSIZE == 32
# define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2)
static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# else
+# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2)
+# define lshi(r0,r1,i0) SLLG(r0,r1,i0,0)
+# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2)
+# define rshi(r0,r1,i0) SRAG(r0,r1,i0,0)
# define rshr_u(r0,r1,r2) SRLG(r0,r1,0,r2)
+# define rshi_u(r0,r1,i0) SRLG(r0,r1,i0,0)
# endif
-# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
-static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# if __WORDSIZE == 32
# define negr(r0,r1) LCR(r0,r1)
+# define lrotr(r0,r1,r2) RLL(r0,r1,0,r2)
+# define lroti(r0,r1,i0) RLL(r0,r1,i0,0)
# else
# define negr(r0,r1) LCGR(r0,r1)
+# define lrotr(r0,r1,r2) RLLG(r0,r1,0,r2)
+# define lroti(r0,r1,i0) RLLG(r0,r1,i0,0)
# endif
+# define rrotr(r0,r1,r2) _rrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rroti(r0,r1,i0) lroti(r0,r1,__WORDSIZE-i0)
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define comr(r0,r1) _comr(_jit,r0,r1)
static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
# define andr(r0,r1,r2) _andr(_jit,r0,r1,r2)
static void _xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0)
static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# if __WORDSIZE == 32
-# define htonr_ui(r0,r1) movr(r0,r1)
-# else
-# define htonr_ui(r0,r1) extr_ui(r0,r1)
-# define htonr_ul(r0,r1) movr(r0,r1)
-# endif
+#define extr(r0,r1,i0,i1) _extr(_jit,r0,r1,i0,i1)
+static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
+#define extr_u(r0,r1,i0,i1) _extr_u(_jit,r0,r1,i0,i1)
+static void _extr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
+#define depr(r0,r1,i0,i1) _depr(_jit,r0,r1,i0,i1)
+static void _depr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
# define extr_c(r0,r1) LGBR(r0,r1)
# define extr_uc(r0,r1) LLGCR(r0,r1)
# define extr_s(r0,r1) LGHR(r0,r1)
# define bmci(i0,r0,i1) bmxi(CC_E,i0,r0,i1)
# define bmci_p(i0,r0,i1) bmxi_p(CC_E,i0,r0,i1)
# define jmpr(r0) BR(r0)
-# define jmpi(i0) _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+# define jmpi(i0,i1) _jmpi(_jit,i0,i1)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t, jit_bool_t);
# define jmpi_p(i0) _jmpi_p(_jit,i0)
static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
# define callr(r0) BALR(_R14_REGNO,r0)
-# define calli(i0) _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+# define calli(i0,i1) _calli(_jit,i0,i1)
+static jit_word_t _calli(jit_state_t*,jit_word_t, jit_bool_t);
# define calli_p(i0) _calli_p(_jit,i0)
static jit_word_t _calli_p(jit_state_t*,jit_word_t);
# define prolog(i0) _prolog(_jit,i0)
return (w);
}
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = beqi_p(_jit->pc.w, r2, 0);
+#if __WORDSIZE == 32
+ LR(r0, r1);
+#else
+ LGR(r0, r1);
+#endif
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_word_t w;
+ w = bnei_p(_jit->pc.w, r2, 0);
+#if __WORDSIZE == 32
+ LR(r0, r1);
+#else
+ LGR(r0, r1);
+#endif
+ patch_at(w, _jit->pc.w);
+}
+
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ LRVR(r0, r1);
+ SRL(r0, 16, 0);
+ LLGHR(r0, r0);
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ LRVR(r0, r1);
+# if __WORDSIZE == 64
+ LLGFR(r0, r0);
+# endif
+}
+
+#if __WORDSIZE == 64
+static void
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ LRVGR(r0, r1);
+}
+#endif
+
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t iscasi, r1_reg;
+ if ((iscasi = (r1 == _NOREG))) {
+ r1_reg = jit_get_reg_but_zero(0);
+ r1 = rn(r1_reg);
+ movi(r1, i0);
+ }
+ /* Do not clobber r2 */
+ movr(r0, r2);
+ /* The CS and CSG instructions below effectively do atomically:
+ * if (*r1 == r0)
+ * *r1 = r3;
+ * else
+ * r0 = *r1
+ * So, we do not need to check cpu flags to know if it did work,
+ * just compare if values are different.
+ * Obviously it is somewhat of undefined behavior if old_value (r2)
+ * and new_value (r3) have the same value, but should still work
+ * as expected as a noop.
+ */
+# if __WORDSIZE == 32
+ CS(r0, r3, 0, r1);
+# else
+ CSG(r0, r3, 0, r1);
+# endif
+ eqr(r0, r0, r2);
+ if (iscasi)
+ jit_unget_reg(r1_reg);
+}
+
static void
_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
/* The only invalid condition is r0 == r1 */
jit_int32_t t2, t3, s2, s3;
- if (r2 == r0 || r2 == r1) {
+ if ((r0 != JIT_NOREG && r2 == r0) || r2 == r1) {
s2 = jit_get_reg(jit_class_gpr);
t2 = rn(s2);
movr(t2, r2);
}
else
t2 = r2;
- if (r3 == r0 || r3 == r1) {
+ if ((r0 != JIT_NOREG && r3 == r0) || r3 == r1) {
s3 = jit_get_reg(jit_class_gpr);
t3 = rn(s3);
movr(t3, r3);
regno = jit_get_reg_pair();
movr(rn(regno) + 1, r2);
MULU_(rn(regno), r3);
- movr(r0, rn(regno) + 1);
+ if (r0 != JIT_NOREG)
+ movr(r0, rn(regno) + 1);
movr(r1, rn(regno));
jit_unget_reg_pair(regno);
}
movr(rn(regno) + 1, r2);
movi(rn(regno), i0);
MULU_(rn(regno), rn(regno));
- movr(r0, rn(regno) + 1);
+ if (r0 != JIT_NOREG)
+ movr(r0, rn(regno) + 1);
movr(r1, rn(regno));
jit_unget_reg_pair(regno);
}
SLL(r0, 0, r2);
}
}
-#endif
static void
_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
- reg = jit_get_reg_but_zero(0);
- movi(rn(reg), i0);
- lshr(r0, r1, rn(reg));
- jit_unget_reg_but_zero(reg);
+ movr(r0, r1);
+ SLL(r0, i0, 0);
}
-# if __WORDSIZE == 32
static void
_rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
SRA(r0, 0, r2);
}
}
-#endif
static void
_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
- reg = jit_get_reg_but_zero(0);
- movi(rn(reg), i0);
- rshr(r0, r1, rn(reg));
- jit_unget_reg_but_zero(reg);
+ movr(r0, r1);
+ SRA(r0, i0, 0);
}
-# if __WORDSIZE == 32
static void
_rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
SRL(r0, 0, r2);
}
}
-#endif
static void
_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ movr(r0, r1);
+ SRL(r0, i0, 0);
+}
+#endif
+
+static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
- reg = jit_get_reg_but_zero(0);
- movi(rn(reg), i0);
- rshr_u(r0, r1, rn(reg));
- jit_unget_reg_but_zero(reg);
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, __WORDSIZE);
+ lrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg_but_zero(0);
+ rsbi(rn(reg), r2, __WORDSIZE);
+ lrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ comr(r0, r1);
+ clzr(r0, r0);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_clo(r0, r1);
+#endif
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+#if __WORDSIZE == 32
+ jit_word_t w;
+#endif
+ jit_int32_t regno;
+ regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+ SLLG(rn(regno), r1, 32, 0);
+#else
+ movr(rn(regno), r1);
+#endif
+ FLOGR(rn(regno), rn(regno));
+ movr(r0, rn(regno));
+#if __WORDSIZE == 32
+ w = blei_p(_jit->pc.w, r0, 31);
+ rshi(r0, r0, 1); /* r0 is 64 */
+ patch_at(w, _jit->pc.w);
+#endif
+ jit_unget_reg_pair(regno);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_clz(r0, r1);
+#endif
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ comr(r0, r1);
+ ctzr(r0, r0);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_cto(r0, r1);
+#endif
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t t0, t1;
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ t0 = jit_get_reg_but_zero(0);
+ t1 = jit_get_reg_but_zero(0);
+ negr(rn(t0), r1);
+ andr(rn(t0), rn(t0), r1);
+ clzr(r0, rn(t0));
+ xori(rn(t1), r0, __WORDSIZE - 1);
+ movnr(r0, rn(t1), rn(t0));
+ jit_unget_reg(t0);
+ jit_unget_reg(t1);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_ctz(r0, r1);
+#endif
+}
+
+static void
+_extr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
+{
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ /* Big Endian always */
+ i0 = __WORDSIZE - (i0 + i1);
+ if (i1 == __WORDSIZE)
+ movr(r0, r1);
+ else {
+ if (__WORDSIZE - (i0 + i1)) {
+ lshi(r0, r1, __WORDSIZE - (i0 + i1));
+ rshi(r0, r0, __WORDSIZE - i1);
+ }
+ else
+ rshi(r0, r1, __WORDSIZE - i1);
+ }
+}
+
+static void
+_extr_u(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
+{
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ /* Big Endian always */
+ i0 = __WORDSIZE - (i0 + i1);
+ if (i1 == __WORDSIZE)
+ movr(r0, r1);
+ else {
+ if (i0)
+ rshi_u(r0, r1, i0);
+ andi(r0, r0, (1L << i1) - 1);
+ }
+}
+
+static void
+_depr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
+{
+ jit_int32_t t0;
+ jit_word_t mask;
+ /* Big Endian always */
+ i0 = __WORDSIZE - (i0 + i1);
+ if (i1 == __WORDSIZE)
+ movr(r0, r1);
+ else {
+ mask = (1L << i1) - 1;
+ t0 = jit_get_reg(jit_class_gpr);
+ andi(rn(t0), r1, mask);
+ if (i0) {
+ lshi(rn(t0), rn(t0), i0);
+ mask <<= i0;
+ }
+ andi(r0, r0, ~mask);
+ orr(r0, r0, rn(t0));
+ jit_unget_reg(t0);
+ }
}
static void
}
#endif
-static void
-_jmpi(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
{
- jit_word_t d;
jit_int32_t reg;
- d = (i0 - _jit->pc.w) >> 1;
- if (s16_p(d))
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 1;
+ if (i1 && s16_p(d))
J(x16(d));
else if (s32_p(d))
BRL(d);
jmpr(rn(reg));
jit_unget_reg_but_zero(reg);
}
+ return (w);
}
static jit_word_t
return (w);
}
-static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
{
- jit_word_t d;
jit_int32_t reg;
- d = (i0 - _jit->pc.w) >> 1;
- if (s32_p(d))
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 1;
+ if (i1 && s16_p(d))
+ BRAS(_R14_REGNO, x16(d));
+ else if (s32_p(d))
BRASL(_R14_REGNO, d);
else {
reg = jit_get_reg_but_zero(0);
callr(rn(reg));
jit_unget_reg_but_zero(reg);
}
+ return (w);
}
static jit_word_t
u.s[7] = i1.s;
#endif
}
- /* BRC */
+ /* BRC or BRL */
else if (i0.b.op == 0xA7) {
- assert(i0.b.r3 == 0x4);
+ assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5);
d = (label - instr) >> 1;
assert(s16_p(d));
i1.b.i2 = d;
u.s[1] = i1.s;
}
- /* BRCL */
+ /* BRCL or BRASL */
else if (i0.b.op == 0xC0) {
- assert(i0.b.r3 == 0x4);
+ assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5);
d = (label - instr) >> 1;
assert(s32_p(d));
i12.i = d;