/*
- * Copyright (C) 2022 Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
*/
#if PROTO
+# define jit_unaligned_p() (jit_cpu.unaligned)
# define ii(i) *_jit->pc.ui++ = (i)
# define can_sign_extend_si12_p(s12) ((s12) <= 2047 && (s12) >= -2048)
# define can_zero_extend_u12_p(u12) ((u12) <= 4095 && (u12) >= 0)
# define _RA_REGNO 1
# define _SP_REGNO 3
# define _FP_REGNO 22
-# define stack_framesize 160
# define ldr(u, v) ldr_l(u, v)
# define ldi(u, v) ldi_l(u, v)
+# define ldxr(u, v, w) ldxr_l(u, v, w)
# define ldxi(u, v, w) ldxi_l(u, v, w)
+# define str(u, v) str_l(u, v)
# define sti(u, v) sti_l(u, v)
+# define stxr(u, v, w) stxr_l(u, v, w)
# define stxi(u, v, w) stxi_l(u, v, w)
# define orrr(op, rk, rj, rd) _orrr(_jit, op, rk, rj, rd)
# define ou5rr(op, i5, rj, rd) _orrr(_jit, op, i5, rj, rd)
# define nop(i0) _nop(_jit, i0)
# define comr(r0, r1) NOR(r0, r1, r1)
# define negr(r0, r1) subr(r0, _ZERO_REGNO, r1)
+# define clor(r0, r1) CLO_D(r0, r1)
+# define clzr(r0, r1) CLZ_D(r0, r1)
+# define ctor(r0, r1) CTO_D(r0, r1)
+# define ctzr(r0, r1) CTZ_D(r0, r1)
+# define rbitr(r0, r1) BITREV_D(r0, r1)
static void _nop(jit_state_t*,jit_int32_t);
# define movr(r0, r1) _movr(_jit, r0, r1)
static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
# define mulr(r0, r1, r2) MUL_D(r0, r1, r2)
# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0)
static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr(r0, r1, r2) MULH_D(r0, r1, r2)
+# define hmuli(r0, r1, i0) _hmuli(_jit, r0, r1, i0)
+static void _hmuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define hmulr_u(r0, r1, r2) MULH_DU(r0, r1, r2)
+# define hmuli_u(r0, r1, i0) _hmuli_u(_jit, r0, r1, i0)
+static void _hmuli_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define qmulr(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 1)
# define qmulr_u(r0, r1, r2, r3) iqmulr(r0, r1, r2, r3, 0)
# define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign)
# define rshi(r0, r1, i0) SRAI_D(r0, r1, i0)
# define rshr_u(r0, r1, r2) SRL_D(r0, r1, r2)
# define rshi_u(r0, r1, i0) SRLI_D(r0, r1, i0)
+# define lrotr(r0, r1, r2) _lrotr(_jit, r0, r1, r2)
+static void _lrotr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+# define lroti(r0, r1, i0) rroti(r0, r1, __WORDSIZE - i0)
+# define rrotr(r0, r1, r2) ROTR_D(r0, r1, r2)
+# define rroti(r0, r1, i0) ROTRI_D(r0, r1, i0)
# define andr(r0, r1, r2) AND(r0, r1, r2)
# define andi(r0, r1, i0) _andi(_jit, r0, r1, i0)
static void _andi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define ldxr_l(r0, r1, r2) LDX_D(r0, r1, r2)
# define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0)
static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define unldr(r0, r1, i0) _unldr(_jit, r0, r1, i0)
+static void _unldr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define unldi(r0, i0, i1) _unldi(_jit, r0, i0, i1)
+static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+# define unldr_u(r0, r1, i0) _unldr_u(_jit, r0, r1, i0)
+static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+# define unldi_u(r0, i0, i1) _unldi_u(_jit, r0, i0, i1)
+static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
# define str_c(r0, r1) ST_B(r1, r0, 0)
# define sti_c(i0, r0) _sti_c(_jit, i0, r0)
static void _sti_c(jit_state_t*, jit_word_t, jit_int32_t);
# define stxr_l(r0, r1, r2) STX_D(r2, r1, r0)
# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1)
static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#define unstr(r0, r1, i0) _unstr(_jit, r0, r1, i0)
+static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define unsti(i0, r0, i1) _unsti(_jit, i0, r0, i1)
+static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
static void _bswapr_us(jit_state_t*, jit_int32_t, jit_int32_t);
# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
static void _bswapr_ui(jit_state_t*, jit_int32_t, jit_int32_t);
# define bswapr_ul(r0, r1) REVB_D(r0, r1)
+# define extr(r0,r1,i0,i1) _extr(_jit,r0,r1,i0,i1)
+static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_word_t);
+# define extr_u(r0,r1,i0,i1) BSTRPICK_D(r0, r1, i0 + i1 - 1, i0)
+# define depr(r0,r1,i0,i1) BSTRINS_D(r0, r1, i0 + i1 - 1, i0)
# define extr_c(r0, r1) EXT_W_B(r0, r1)
# define extr_uc(r0, r1) BSTRPICK_D(r0, r1, 7, 0)
# define extr_s(r0, r1) EXT_W_H(r0, r1)
static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
# define jmpr(r0) JIRL(_ZERO_REGNO, r0, 0)
# define jmpi(i0) _jmpi(_jit, i0)
-static void _jmpi(jit_state_t*, jit_word_t);
+static jit_word_t _jmpi(jit_state_t*, jit_word_t);
# define jmpi_p(i0) _jmpi_p(_jit, i0)
static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
# define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1)
static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
# define callr(r0) JIRL(_RA_REGNO, r0, 0)
# define calli(i0) _calli(_jit, i0)
-static void _calli(jit_state_t*, jit_word_t);
+static jit_word_t _calli(jit_state_t*, jit_word_t);
# define calli_p(i0) _calli_p(_jit, i0)
static jit_word_t _calli_p(jit_state_t*, jit_word_t);
# define prolog(i0) _prolog(_jit, i0)
jit_unget_reg(reg);
}
+static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ jit_int32_t reg;
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ hmulr_u(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+}
+
static void
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
jit_unget_reg(reg);
}
+static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (r0 != r1 && r0 != r2) {
+ rsbi(r0, r2, __WORDSIZE);
+ rrotr(r0, r1, r0);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ rsbi(rn(reg), r2, __WORDSIZE);
+ rrotr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+}
+
static void
_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
}
}
+static void
+_unldr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ if (jit_unaligned_p())
+ fallback_unldr(r0, r1, i0);
+ else
+ generic_unldr(r0, r1, i0);
+}
+
+static void
+_unldi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+ jit_int32_t t0, r2;
+ if (jit_unaligned_p())
+ fallback_unldi(r0, i0, i1);
+ else
+ generic_unldi(r0, i0, i1);
+}
+
+static void
+_unldr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ if (jit_unaligned_p())
+ fallback_unldr_u(r0, r1, i0);
+ else
+ generic_unldr_u(r0, r1, i0);
+}
+
+static void
+_unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
+{
+ jit_int32_t t0, r2;
+ if (jit_unaligned_p())
+ fallback_unldi_u(r0, i0, i1);
+ else
+ generic_unldi_u(r0, i0, i1);
+}
+
static void
_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
}
}
+static void
+_unstr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+ if (jit_unaligned_p())
+ fallback_unstr(r0, r1, i0);
+ else
+ generic_unstr(r0, r1, i0);
+}
+
+static void
+_unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+ if (jit_unaligned_p())
+ fallback_unsti(i0, r0, i1);
+ else
+ generic_unsti(i0, r0, i1);
+}
+
static void
_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
extr_ui(r0, r0);
}
+static void
+_extr(jit_state_t *_jit,
+ jit_int32_t r0, jit_int32_t r1, jit_word_t i0, jit_word_t i1)
+{
+ assert(i0 >= 0 && i1 >= 1 && i0 + i1 <= __WORDSIZE);
+ if (__WORDSIZE - (i0 + i1)) {
+ lshi(r0, r1, __WORDSIZE - (i0 + i1));
+ rshi(r0, r0, __WORDSIZE - i1);
+ }
+ else
+ rshi(r0, r1, __WORDSIZE - i1);
+}
+
static void
_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
return (w);
}
-static void
+static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
- w = (i0 - _jit->pc.w) >> 2;
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 2;
if (can_sign_extend_si26_p(i0))
- B(w);
+ B(d);
else
- (void)jmpi_p(i0);
+ w = jmpi_p(i0);
+ return (w);
}
static jit_word_t
return (w);
}
-static void
+static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
- w = (i0 - _jit->pc.w) >> 2;
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 2;
if (can_sign_extend_si26_p(i0))
- BL(w);
+ BL(d);
else
- (void)calli_p(i0);
+ w = calli_p(i0);
+ return (w);
}
static jit_word_t
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t reg;
+ jit_int32_t reg, offs;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
if (_jitc->function->assume_frame)
return;
_jitc->function->stack = ((_jitc->function->self.alen -
/* align stack at 16 bytes */
_jitc->function->self.aoff) + 15) & -16;
- subi(_SP_REGNO, _SP_REGNO, stack_framesize);
- stxi(0, _SP_REGNO, _RA_REGNO);
- stxi(8, _SP_REGNO, _FP_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _S0))
- stxi(16, _SP_REGNO, rn(_S0));
- if (jit_regset_tstbit(&_jitc->function->regset, _S1))
- stxi(24, _SP_REGNO, rn(_S1));
- if (jit_regset_tstbit(&_jitc->function->regset, _S2))
- stxi(32, _SP_REGNO, rn(_S2));
- if (jit_regset_tstbit(&_jitc->function->regset, _S3))
- stxi(40, _SP_REGNO, rn(_S3));
- if (jit_regset_tstbit(&_jitc->function->regset, _S4))
- stxi(48, _SP_REGNO, rn(_S4));
- if (jit_regset_tstbit(&_jitc->function->regset, _S5))
- stxi(56, _SP_REGNO, rn(_S5));
- if (jit_regset_tstbit(&_jitc->function->regset, _S6))
- stxi(64, _SP_REGNO, rn(_S6));
- if (jit_regset_tstbit(&_jitc->function->regset, _S7))
- stxi(72, _SP_REGNO, rn(_S7));
- if (jit_regset_tstbit(&_jitc->function->regset, _S8))
- stxi(80, _SP_REGNO, rn(_S8));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
- stxi_d(88, _SP_REGNO, rn(_FS0));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
- stxi_d(96, _SP_REGNO, rn(_FS1));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
- stxi_d(104, _SP_REGNO, rn(_FS2));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
- stxi_d(112, _SP_REGNO, rn(_FS3));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
- stxi_d(120, _SP_REGNO, rn(_FS4));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
- stxi_d(128, _SP_REGNO, rn(_FS5));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
- stxi_d(136, _SP_REGNO, rn(_FS6));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
- stxi_d(144, _SP_REGNO, rn(_FS7));
- movr(_FP_REGNO, _SP_REGNO);
+
+ if (_jitc->function->stack)
+ _jitc->function->need_stack = 1;
+ if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+ /* check if any callee save register needs to be saved */
+ for (reg = 0; reg < _jitc->reglen; ++reg)
+ if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+ (_rvs[reg].spec & jit_class_sav)) {
+ _jitc->function->need_stack = 1;
+ break;
+ }
+ }
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+ if (_jitc->function->need_frame) {
+ stxi(0, _SP_REGNO, _RA_REGNO);
+ stxi(8, _SP_REGNO, _FP_REGNO);
+ }
+ /* callee save registers */
+ for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ stxi(offs, _SP_REGNO, rn(iregs[reg]));
+ offs += sizeof(jit_word_t);
+ }
+ }
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->need_frame)
+ movr(_FP_REGNO, _SP_REGNO);
if (_jitc->function->stack)
subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
}
if (_jitc->function->self.call & jit_call_varargs) {
for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
- stxi(stack_framesize - ((8 - reg) * 8),
+ stxi(jit_framesize() - ((8 - reg) * 8),
_FP_REGNO, rn(JIT_RA0 - reg));
}
}
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
+ jit_int32_t reg, offs;
if (_jitc->function->assume_frame)
return;
- movr(_SP_REGNO, _FP_REGNO);
- ldxi(_RA_REGNO, _SP_REGNO, 0);
- ldxi(_FP_REGNO, _SP_REGNO, 8);
- if (jit_regset_tstbit(&_jitc->function->regset, _S0))
- ldxi(rn(_S0), _SP_REGNO, 16);
- if (jit_regset_tstbit(&_jitc->function->regset, _S1))
- ldxi(rn(_S1), _SP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _S2))
- ldxi(rn(_S2), _SP_REGNO, 32);
- if (jit_regset_tstbit(&_jitc->function->regset, _S3))
- ldxi(rn(_S3), _SP_REGNO, 40);
- if (jit_regset_tstbit(&_jitc->function->regset, _S4))
- ldxi(rn(_S4), _SP_REGNO, 48);
- if (jit_regset_tstbit(&_jitc->function->regset, _S5))
- ldxi(rn(_S5), _SP_REGNO, 56);
- if (jit_regset_tstbit(&_jitc->function->regset, _S6))
- ldxi(rn(_S6), _SP_REGNO, 64);
- if (jit_regset_tstbit(&_jitc->function->regset, _S7))
- ldxi(rn(_S7), _SP_REGNO, 72);
- if (jit_regset_tstbit(&_jitc->function->regset, _S8))
- ldxi(rn(_S8), _SP_REGNO, 80);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
- ldxi_d(rn(_FS0), _SP_REGNO, 88);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
- ldxi_d(rn(_FS1), _SP_REGNO, 96);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
- ldxi_d(rn(_FS2), _SP_REGNO, 104);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
- ldxi_d(rn(_FS3), _SP_REGNO, 112);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
- ldxi_d(rn(_FS4), _SP_REGNO, 120);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
- ldxi_d(rn(_FS5), _SP_REGNO, 128);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
- ldxi_d(rn(_FS6), _SP_REGNO, 136);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
- ldxi_d(rn(_FS7), _SP_REGNO, 144);
- addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+ if (_jitc->function->need_frame) {
+ movr(_SP_REGNO, _FP_REGNO);
+ ldxi(_RA_REGNO, _SP_REGNO, 0);
+ ldxi(_FP_REGNO, _SP_REGNO, 8);
+ }
+
+ /* callee save registers */
+ for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+ offs += sizeof(jit_word_t);
+ }
+ }
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ addi(_SP_REGNO, _SP_REGNO, jit_framesize());
JIRL(_ZERO_REGNO, _RA_REGNO, 0);
}
assert(_jitc->function->self.call & jit_call_varargs);
/* Initialize va_list to the first stack argument. */
if (jit_arg_reg_p(_jitc->function->vagp))
- addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8));
+ addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8));
else
- addi(r0, _FP_REGNO, _jitc->function->self.size);
+ addi(r0, _FP_REGNO, jit_selfsize());
}
static void