/*
- * Copyright (C) 2012-2019 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# define jit_armv5_p() (jit_cpu.version >= 5)
# define jit_armv5e_p() (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
# define jit_armv6_p() (jit_cpu.version >= 6)
-# define jit_armv7r_p() 0
-# define stack_framesize 48
+# define jit_armv7_p() (jit_cpu.version >= 7)
+# define jit_armv7r_p() (jit_cpu.version > 7 || (jit_cpu.version == 7 && jit_cpu.extend))
extern int __aeabi_idivmod(int, int);
extern unsigned __aeabi_uidivmod(unsigned, unsigned);
# define _R0_REGNO 0x00
# define THUMB2_UMULL 0xfba00000
# define ARM_SMULL 0x00c00090
# define THUMB2_SMULL 0xfb800000
+/* >> ARMv7r */
+# define ARM_SDIV 0x07100010
+# define ARM_UDIV 0x07300010
# define THUMB2_SDIV 0xfb90f0f0
# define THUMB2_UDIV 0xfbb0f0f0
+/* << ARMv7r */
# define ARM_AND 0x00000000
# define THUMB_AND 0x4000
# define THUMB2_AND 0xea000000
# define ARM_XTR8 0x00000400 /* ?xt? rotate 8 bits */
# define ARM_XTR16 0x00000800 /* ?xt? rotate 16 bits */
# define ARM_XTR24 0x00000c00 /* ?xt? rotate 24 bits */
+# define ARM_LDREX 0x01900090
+# define THUMB2_LDREX 0xe8500000
+# define ARM_STREX 0x01800090
+# define THUMB2_STREX 0xe8400000
/* << ARMv6* */
+/* >> ARMv6t2 */
+# define THUMB2_CLZ 0xfab0f080
+# define THUMB2_RBIT 0xfa90f0a0
+# define ARM_RBIT 0x06f00030
+/* << ARMv6t2 */
+# define ARM_CLZ 0x01600010
+/* >> ARMv7 */
+# define ARM_DMB 0xf57ff050
+# define THUMB2_DMB 0xf3bf8f50
+# define DMB_SY 0xf
+# define DMB_ST 0xe
+# define DMB_ISH 0xb
+# define DMB_ISHST 0xa
+# define DMB_NSH 0x7
+# define DMB_NSHT 0x6
+# define DMB_OSH 0x3
+# define DMB_OSHST 0x2
+/* << ARMv7 */
# define ARM_SHIFT 0x01a00000
# define ARM_R 0x00000010 /* register shift */
# define ARM_LSL 0x00000000
static void _tpp(jit_state_t*,int,int);
# define torl(o,rn,im) _torl(_jit,o,rn,im)
static void _torl(jit_state_t*,int,int,int) maybe_unused;
+# define DMB(im) dmb(im)
+# define T2_DMB(im) tdmb(im)
+# define dmb(im) _dmb(_jit, im)
+static void _dmb(jit_state_t *_jit, int im);
+# define tdmb(im) _tdmb(_jit, im)
+static void _tdmb(jit_state_t *_jit, int im);
# define CC_MOV(cc,rd,rm) corrr(cc,ARM_MOV,0,rd,rm)
# define MOV(rd,rm) CC_MOV(ARM_CC_AL,rd,rm)
# define T1_MOV(rd,rm) is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7))
# define NOT(rd,rm) CC_NOT(ARM_CC_AL,rd,rm)
# define T1_NOT(rd,rm) T1_MVN(rd,rm)
# define T2_NOT(rd,rm) T2_MVN(rd,rm)
+# define T2_CLZ(rd,rm) torrr(THUMB2_CLZ,rm,rd,rm)
+# define CC_CLZ(cc,rd,rm) corrrr(cc,ARM_CLZ,_R15_REGNO,rd,_R15_REGNO,rm)
+# define CLZ(rd,rm) CC_CLZ(ARM_CC_AL,rd,rm)
+# define T2_RBIT(rd,rm) torrr(THUMB2_RBIT,rm,rd,rm)
+# define CC_RBIT(cc,rd,rm) corrrr(cc,ARM_RBIT,_R15_REGNO,rd,_R15_REGNO,rm)
+# define RBIT(rd,rm) CC_RBIT(ARM_CC_AL,rd,rm)
# define NOP() MOV(_R0_REGNO, _R0_REGNO)
# define T1_NOP() is(0xbf00)
# define CC_ADD(cc,rd,rn,rm) corrr(cc,ARM_ADD,rn,rd,rm)
# define CC_UMULL(cc,rl,rh,rn,rm) corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
# define UMULL(rl,rh,rn,rm) CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
# define T2_UMULL(rl,rh,rn,rm) torrrr(THUMB2_UMULL,rn,rl,rh,rm)
+# define CC_SDIV(cc,rd,rn,rm) corrrr(cc,ARM_SDIV,rd,15,rn,rm)
+# define SDIV(rd,rn,rm) CC_SDIV(ARM_CC_AL,rd,rm,rn)
+# define CC_UDIV(cc,rd,rn,rm) corrrr(cc,ARM_UDIV,rd,15,rn,rm)
+# define UDIV(rd,rn,rm) CC_UDIV(ARM_CC_AL,rd,rm,rn)
# define T2_SDIV(rd,rn,rm) torrr(THUMB2_SDIV,rn,rd,rm)
# define T2_UDIV(rd,rn,rm) torrr(THUMB2_UDIV,rn,rd,rm)
# define CC_AND(cc,rd,rn,rm) corrr(cc,ARM_AND,rn,rd,rm)
# define CMNI(rn,im) CC_CMNI(ARM_CC_AL,rn,im)
# define T2_CMNI(rn,im) torri(THUMB2_CMNI,rn,_R15_REGNO,im)
# define CC_TST(cc,rn,rm) corrr(cc,ARM_TST,rn,r0,rm)
-# define TST(rn,rm) CC_TST(ARM_CC_AL,rn,rm)
+# define TST(rn,rm) corrr(ARM_CC_AL,ARM_TST,rn,0,rm)
# define T1_TST(rn,rm) is(THUMB_TST|(_u3(rm)<<3)|_u3(rn))
# define T2_TST(rn,rm) torrr(THUMB2_TST,rn,_R15_REGNO,rm)
# define CC_TSTI(cc,rn,im) corri(cc,ARM_TST|ARM_I,rn,0,im)
# define CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im)
# define LDRDIN(rt,rn,im) CC_LDRDIN(ARM_CC_AL,rt,rn,im)
# define T2_LDRDIN(rt,rt2,rn,im) torrri8(THUMB2_LDRDI,rn,rt,rt2,im)
+# define CC_LDREX(cc,rt,rn) corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf)
+# define LDREX(rt,rn) CC_LDREX(ARM_CC_AL,rt,rn)
+# define T2_LDREX(rt,rn,im) torrri8(THUMB2_LDREX,rn,rt,0xf,im)
# define CC_STRB(cc,rt,rn,rm) corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
# define STRB(rt,rn,rm) CC_STRB(ARM_CC_AL,rt,rn,rm)
# define T1_STRB(rt,rn,rm) is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
# define CC_STRDIN(cc,rt,rn,im) corri8(cc,ARM_STRDI,rn,rt,im)
# define STRDIN(rt,rn,im) CC_STRDIN(ARM_CC_AL,rt,rn,im)
# define T2_STRDIN(rt,rt2,rn,im) torrri8(THUMB2_STRDI,rn,rt,rt2,im)
+# define CC_STREX(cc,rd,rt,rn) corrrr(cc,ARM_STREX,rn,rd,0xf,rt)
+# define STREX(rd,rt,rn) CC_STREX(ARM_CC_AL,rd,rt,rn)
+# define T2_STREX(rd,rt,rn,im) torrri8(THUMB2_STREX,rn,rt,rd,im)
# define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
# define LDMIA(rn,im) CC_LDMIA(ARM_CC_AL,rn,im)
# define CC_LDM(cc,rn,im) CC_LDMIA(cc,rn,im)
# define T2_POP(im) tpp(THUMB2_POP,im)
# define jit_get_reg_args() \
do { \
+ CHECK_REG_ARGS(); \
+ jit_check_frame(); \
(void)jit_get_reg(_R0|jit_class_named|jit_class_gpr); \
(void)jit_get_reg(_R1|jit_class_named|jit_class_gpr); \
(void)jit_get_reg(_R2|jit_class_named|jit_class_gpr); \
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
+static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
+static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define comr(r0,r1) _comr(_jit,r0,r1)
static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
# define negr(r0,r1) _negr(_jit,r0,r1)
static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2)
static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
static void _stxr_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define stxi_i(r0,r1,i0) _stxi_i(_jit,r0,r1,i0)
static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1)
-static void _htonr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1)
-static void _htonr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
-# else
-# define htonr_us(r0,r1) extr_us(r0,r1)
-# define htonr(r0,r1) movr(r0,r1)
-# endif
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_c(r0,r1) _extr_c(_jit,r0,r1)
static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t);
# define extr_uc(r0,r1) _extr_uc(_jit,r0,r1)
static void _epilog(jit_state_t*,jit_node_t*);
# define callr(r0) _callr(_jit,r0)
static void _callr(jit_state_t*,jit_int32_t);
-# define calli(i0) _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
-# define calli_p(i0) _calli_p(_jit,i0)
-static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+# define calli(i0,i1) _calli(_jit,i0,i1)
+static void _calli(jit_state_t*,jit_word_t,jit_bool_t);
+# define calli_p(i0,i1) _calli_p(_jit,i0,i1)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_bool_t);
# define vastart(r0) _vastart(_jit, r0)
static void _vastart(jit_state_t*, jit_int32_t);
# define vaarg(r0, r1) _vaarg(_jit, r0, r1)
assert(!(o & 0x0000ffff));
if (o == THUMB2_PUSH)
assert(!(im & 0x8000));
- assert(__builtin_popcount(im & 0x1fff) > 1);
+ assert(__builtin_popcount(im & 0x7fff) > 1);
thumb.i = o|im;
iss(thumb.s[0], thumb.s[1]);
}
iss(thumb.s[0], thumb.s[1]);
}
+static void
+_dmb(jit_state_t *_jit, int im)
+{
+ assert(!(im & 0xfffffff0));
+ ii(ARM_DMB|im);
+}
+
+static void
+_tdmb(jit_state_t *_jit, int im)
+{
+ jit_thumb_t thumb;
+ assert(!(im & 0xfffffff0));
+ thumb.i = THUMB2_DMB | im;
+ iss(thumb.s[0], thumb.s[1]);
+}
+
static void
_nop(jit_state_t *_jit, jit_int32_t i0)
{
return (w);
}
+static void
+_movznr(jit_state_t *_jit, int ct, jit_int32_t r0,
+ jit_int32_t r1, jit_int32_t r2)
+{
+ if (jit_thumb_p()) {
+ if (r2 < 7)
+ T1_CMPI(r2, 0);
+ else
+ T2_CMPI(r2, 0);
+ IT(ct);
+ T1_MOV(r0, r1);
+ } else {
+ CMPI(r2, 0);
+ CC_MOV(ct, r0, r1);
+ }
+}
+
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ _movznr(_jit, ARM_CC_NE, r0, r1, r2);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ _movznr(_jit, ARM_CC_EQ, r0, r1, r2);
+}
+
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t r1_reg, iscasi;
+ jit_word_t retry, done, jump0, jump1;
+ if (!jit_armv7_p())
+ fallback_casx(r0, r1, r2, r3, i0);
+ else {
+ if ((iscasi = (r1 == _NOREG))) {
+ r1_reg = jit_get_reg(jit_class_gpr);
+ r1 = rn(r1_reg);
+ movi(r1, i0);
+ }
+ if (jit_thumb_p()) {
+ T2_DMB(DMB_ISH);
+ /* retry: */
+ retry = _jit->pc.w;
+ T2_LDREX(r0, r1, 0);
+ eqr(r0, r0, r2);
+ jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */
+ T2_STREX(r0, r3, r1, 0);
+ jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */
+ /* r0 = 0 if memory updated, 1 otherwise */
+ xori(r0, r0, 1);
+ /* done: */
+ done = _jit->pc.w;
+ T2_DMB(DMB_ISH);
+ }
+ else {
+ DMB(DMB_ISH);
+ /* retry: */
+ retry = _jit->pc.w;
+ LDREX(r0, r1);
+ eqr(r0, r0, r2);
+ jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */
+ STREX(r0, r3, r1);
+ jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */
+ /* r0 = 0 if memory updated, 1 otherwise */
+ xori(r0, r0, 1);
+ /* done: */
+ done = _jit->pc.w;
+ DMB(DMB_ISH);
+ }
+ patch_at(arm_patch_jump, jump0, done);
+ patch_at(arm_patch_jump, jump1, retry);
+ if (iscasi)
+ jit_unget_reg(r1_reg);
+ }
+}
+
static void
_comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
RSBI(r0, r1, 0);
}
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (!jit_thumb_p() && jit_armv5e_p())
+ CLZ(r0, r1);
+ else if (jit_thumb_p() && jit_armv7_p()) { /* armv6t2 actually */
+ T2_CLZ(r0, r1);
+ }
+ else
+ fallback_clz(r0, r0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_armv7_p()) { /* armv6t2 actually */
+ if (jit_thumb_p())
+ T2_RBIT(r0, r1);
+ else
+ RBIT(r0, r1);
+ clor(r0, r0);
+ }
+ else
+ fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_armv7_p()) { /* armv6t2 actually */
+ if (jit_thumb_p())
+ T2_RBIT(r0, r1);
+ else
+ RBIT(r0, r1);
+ clzr(r0, r0);
+ }
+ else
+ fallback_ctz(r0, r1);
+}
+
static void
_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
static void
_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- if (jit_armv7r_p() && jit_thumb_p())
- T2_SDIV(r0, r1, r2);
+ if (jit_armv7r_p()) {
+ if (jit_thumb_p())
+ T2_SDIV(r0, r1, r2);
+ else
+ SDIV(r0, r1, r2);
+ }
else
divrem(1, 1, r0, r1, r2);
}
static void
_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- if (jit_armv7r_p() && jit_thumb_p())
- T2_UDIV(r0, r1, r2);
+ if (jit_armv7r_p()) {
+ if (jit_thumb_p())
+ T2_UDIV(r0, r1, r2);
+ else
+ UDIV(r0, r1, r2);
+ }
else
divrem(1, 0, r0, r1, r2);
}
static void
_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- divrem(0, 1, r0, r1, r2);
+ if (jit_armv7r_p()) {
+ jit_int32_t reg;
+ if (r0 == r1 || r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ divr(rn(reg), r1, r2);
+ mulr(rn(reg), r2, rn(reg));
+ subr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ divr(r0, r1, r2);
+ mulr(r0, r2, r0);
+ subr(r0, r1, r0);
+ }
+ }
+ else
+ divrem(0, 1, r0, r1, r2);
}
static void
static void
_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- divrem(0, 0, r0, r1, r2);
+ if (jit_armv7r_p()) {
+ jit_int32_t reg;
+ if (r0 == r1 || r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ divr_u(rn(reg), r1, r2);
+ mulr(rn(reg), r2, rn(reg));
+ subr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ divr_u(r0, r1, r2);
+ mulr(r0, r2, r0);
+ subr(r0, r1, r0);
+ }
+ }
+ else
+ divrem(0, 0, r0, r1, r2);
}
static void
jit_word_t w;
jit_word_t d;
jit_int32_t reg;
+ /* i1 means jump is reachable in signed 24 bits */
if (i1) {
- /* Assume jump is not longer than 23 bits if inside jit */
w = _jit->pc.w;
/* if thumb and in thumb mode */
if (jit_thumb_p() && _jitc->thumb) {
}
}
-# if __BYTE_ORDER == __LITTLE_ENDIAN
static void
-_htonr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
- jit_int32_t t0;
if (jit_thumb_p()) {
if ((r0|r1) < 8)
T1_REV(r0, r1);
rshi_u(r0, r0, 16);
}
else {
- t0 = jit_get_reg(jit_class_gpr);
- rshi(rn(t0), r1, 8);
- andi(r0, r1, 0xff);
- andi(rn(t0), rn(t0), 0xff);
- lshi(r0, r0, 8);
- orr(r0, r0, rn(t0));
- jit_unget_reg(t0);
+ generic_bswapr_us(_jit, r0, r1);
}
}
}
/* inline glibc htonl (without register clobber) */
static void
-_htonr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
if (jit_thumb_p()) {
}
}
}
-#endif
static void
_extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
}
static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t exchange_p)
{
jit_word_t d;
jit_int32_t reg;
- d = ((i0 - _jit->pc.w) >> 2) - 2;
- if (!jit_exchange_p() && !jit_thumb_p() && _s24P(d))
- BLI(d & 0x00ffffff);
+ if (!exchange_p) {
+ if (jit_thumb_p()) {
+ if (jit_exchange_p())
+ /* skip switch from arm to thumb
+ * exchange_p set to zero means a jit function
+ * call in the same jit code buffer */
+ d = ((i0 + 8 - _jit->pc.w) >> 1) - 2;
+ else
+ d = ((i0 - _jit->pc.w) >> 1) - 2;
+ }
+ else d = ((i0 - _jit->pc.w) >> 2) - 2;
+ if (_s24P(d)) {
+ if (jit_thumb_p()) T2_BLI(encode_thumb_jump(d));
+ else BLI(d & 0x00ffffff);
+ }
+ else goto fallback;
+ }
else {
+ fallback:
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
if (jit_thumb_p())
}
static jit_word_t
-_calli_p(jit_state_t *_jit, jit_word_t i0)
+_calli_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
{
jit_word_t w;
+ jit_word_t d;
jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- w = _jit->pc.w;
- movi_p(rn(reg), i0);
- if (jit_thumb_p())
- T1_BLX(rn(reg));
- else
- BLX(rn(reg));
- jit_unget_reg(reg);
+ /* i1 means call is reachable in signed 24 bits */
+ if (i1) {
+ w = _jit->pc.w;
+ if (jit_thumb_p()) d = ((i0 - _jit->pc.w) >> 1) - 2;
+ else d = ((i0 - _jit->pc.w) >> 2) - 2;
+ assert(_s24P(d));
+ if (jit_thumb_p()) T2_BLI(encode_thumb_jump(d));
+ else BLI(d & 0x00ffffff);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ w = _jit->pc.w;
+ movi_p(rn(reg), i0);
+ if (jit_thumb_p())
+ T1_BLX(rn(reg));
+ else
+ BLX(rn(reg));
+ jit_unget_reg(reg);
+ }
return (w);
}
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t reg;
+ jit_int32_t reg, mask, count;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
+ if (jit_swf_p())
+ CHECK_SWF_OFFSET();
+ CHECK_REG_ARGS();
if (_jitc->function->assume_frame) {
if (jit_thumb_p() && !_jitc->thumb)
_jitc->thumb = _jit->pc.w;
_jitc->function->stack = ((_jitc->function->self.alen -
/* align stack at 8 bytes */
_jitc->function->self.aoff) + 7) & -8;
+ /* If this jit_check_frame() succeeds, it actually is just a need_stack,
+ * usually for arguments, so, allocai was not called, but pusharg*
+ * was called increasing stack size, for negative access offsets.
+ * This can be optimized for one less prolog instruction, that is,
+ * do not create the frame pointer, and only add _jitc->function->stack
+ * to sp, and on epilog, instead of moving fp to sp, just add negative
+ * value of _jitc->function->stack. Since this condition requires a
+ * large function body for excess arguments to called function, keep
+ * things a bit simpler for now, as this is the only place need_stack
+ * would be useful. */
+ if (_jitc->function->stack)
+ jit_check_frame();
+
+ for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ mask |= 1 << rn(iregs[reg]);
+ ++count;
+ }
+ }
+ /* One extra register to keep stack 8 bytes aligned */
+ if (count & 1) {
+ for (reg = 4; reg < 10; reg++) {
+ if (!(mask & (1 << reg))) {
+ mask |= 1 << reg;
+ break;
+ }
+ }
+ }
+ if (_jitc->function->need_frame || _jitc->function->need_return)
+ mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+ if (!jit_swf_p() && _jitc->function->save_reg_args &&
+ !(_jitc->function->self.call & jit_call_varargs))
+ mask |= 0xf;
if (jit_thumb_p()) {
/* switch to thumb mode (better approach would be to
* ORR 1 address being called, but no clear distinction
* of what is a pointer to a jit function, or if patching
* a pointer to a jit function) */
- ADDI(_R12_REGNO, _R15_REGNO, 1);
- BX(_R12_REGNO);
+ if (jit_exchange_p()) {
+ ADDI(_R12_REGNO, _R15_REGNO, 1);
+ BX(_R12_REGNO);
+ }
if (!_jitc->thumb)
_jitc->thumb = _jit->pc.w;
- if (jit_cpu.abi) {
- T2_PUSH(0xf);
- T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- VPUSH_F64(_D8_REGNO, 8);
- }
- else {
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
T2_PUSH(0xf);
- T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- }
+ if (mask)
+ T2_PUSH(mask);
}
else {
- if (jit_cpu.abi) {
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
PUSH(0xf);
- PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- VPUSH_F64(_D8_REGNO, 8);
- }
- else {
- PUSH(0xf);
- PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- }
+ if (mask)
+ PUSH(mask);
}
- movr(_FP_REGNO, _SP_REGNO);
+ if (_jitc->function->need_frame)
+ movr(_FP_REGNO, _SP_REGNO);
if (_jitc->function->stack)
subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
+ jit_int32_t reg, mask, count;
if (_jitc->function->assume_frame)
return;
- movr(_SP_REGNO, _FP_REGNO);
- if (jit_cpu.abi)
- VPOP_F64(_D8_REGNO, 8);
- if (jit_thumb_p())
- T2_POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- else
- POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- addi(_SP_REGNO, _SP_REGNO, 16);
+ for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ mask |= 1 << rn(iregs[reg]);
+ ++count;
+ }
+ }
+ /* One extra register to keep stack 8 bytes aligned */
+ if (count & 1) {
+ for (reg = 4; reg < 10; reg++) {
+ if (!(mask & (1 << reg))) {
+ mask |= 1 << reg;
+ break;
+ }
+ }
+ }
+ if (_jitc->function->need_frame || _jitc->function->need_return)
+ mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+ if (_jitc->function->need_frame)
+ movr(_SP_REGNO, _FP_REGNO);
+ if (!jit_swf_p() && _jitc->function->save_reg_args &&
+ !(_jitc->function->self.call & jit_call_varargs))
+ addi(_SP_REGNO, _SP_REGNO, 16);
+ if (mask) {
+ if (jit_thumb_p())
+ T2_POP(mask);
+ else
+ POP(mask);
+ }
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
+ addi(_SP_REGNO, _SP_REGNO, 16);
if (jit_thumb_p())
T1_BX(_LR_REGNO);
else
* The -16 is to account for the 4 argument registers
* always saved, and _jitc->function->vagp is to account
* for declared arguments. */
- addi(r0, _FP_REGNO, _jitc->function->self.size -
- 16 + _jitc->function->vagp);
+ addi(r0, _FP_REGNO, jit_selfsize() - 16 + _jitc->function->vagp);
}
static void
jit_word_t w;
} u;
u.w = instr;
- if (kind == arm_patch_jump) {
+ if (kind == arm_patch_call) {
+ if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
+ code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+ assert((thumb.i & THUMB2_BLI) == THUMB2_BLI);
+ /* skip code to switch from arm to thumb mode */
+ if (jit_exchange_p())
+ d = ((label + 8 - instr) >> 1) - 2;
+ else
+ d = ((label - instr) >> 1) - 2;
+ assert(_s24P(d));
+ thumb.i = THUMB2_BLI | encode_thumb_jump(d);
+ thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+ }
+ else {
+ thumb.i = u.i[0];
+ assert((thumb.i & 0x0f000000) == ARM_BLI);
+ d = ((label - instr) >> 2) - 2;
+ assert(_s24P(d));
+ u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
+ }
+ }
+ else if (kind == arm_patch_jump) {
if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
if ((thumb.i & THUMB2_B) == THUMB2_B) {