2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 * This file is part of GNU lightning.
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
17 * Paulo Cesar Pereira de Andrade
21 /* as per vfp_regno macro, required due to "support" to soft float registers
22 * or using integer registers as arguments to float operations */
24 # define ARM_V_Q 0x00000040
25 # define FPSCR_N 0x80000000 /* Negative flag */
26 # define FPSCR_Z 0x40000000 /* Zero flag */
27 # define FPSCR_C 0x20000000 /* Carry flag */
28 # define FPSCR_V 0x10000000 /* Overflow flag */
29 # define FPSCR_QC 0x08000000 /* Cumulative saturation */
30 # define FPSCR_AHP 0x04000000 /* Alt. half-precision */
31 # define FPSCR_DN 0x02000000 /* Default NaN mode */
32 # define FPSCR_FZ 0x01000000 /* Flush to zero */
33 # define FPSCR_RMASK 0x00c00000
34 # define FPSCR_RN 0x00000000 /* Round to Nearest */
35 # define FPSCR_RP 0x00400000 /* Round to Plus Infinity */
36 # define FPSCR_RM 0x00800000 /* Round to Minus Infinity */
37 # define FPSCR_RZ 0x00c00000 /* Round towards Zero */
38 # define FPSCR_STRIDE 0x00300000
39 # define FPSCR_RES1 0x00080000 /* Reserved, UNK/SBZP */
40 # define FPSCR_LEN 0x00070000
41 # define FPSCR_IDE 0x00008000 /* Input Denormal trap */
42 # define FPSCR_IXE 0x00001000 /* Inexact trap */
43 # define FPSCR_UFE 0x00000800 /* Underflow trap */
44 # define FPSCR_OFE 0x00000400 /* Overflow trap */
45 # define FPSCR_DZE 0x00000200 /* Division by zero trap */
46 # define FPSCR_IOE 0x00000100 /* Invalid Operation trap */
47 # define FPSCR_IDC 0x00000080 /* Input Denormal flag */
48 # define FPSCR_RES0 0x00000060 /* Reserved, UNK/SBZP */
49 # define FPSCR_IXC 0x00000010 /* Inexact flag */
50 # define FPSCR_UFC 0x00000008 /* Underflow flag */
51 # define FPSCR_OFC 0x00000004 /* Overflow flag */
52 # define FPSCR_DZC 0x00000002 /* Division by zero flag */
53 # define FPSCR_IOC 0x00000001 /* Invalid Operation flag */
54 # define ARM_V_E 0x00000080 /* ARM_VCMP except if NaN */
55 # define ARM_V_Z 0x00010000 /* ARM_VCMP with zero */
56 # define ARM_V_F64 0x00000100
57 # define ARM_VADD_F 0x0e300a00
58 # define ARM_VSUB_F 0x0e300a40
59 # define ARM_VMUL_F 0x0e200a00
60 # define ARM_VDIV_F 0x0e800a00
61 # define ARM_VABS_F 0x0eb00ac0
62 # define ARM_VNEG_F 0x0eb10a40
63 # define ARM_VSQRT_F 0x0eb10ac0
64 # define ARM_VMOV_F 0x0eb00a40
65 # define ARM_VMOV_A_S 0x0e100a10 /* vmov rn, sn */
66 # define ARM_VMOV_S_A 0x0e000a10 /* vmov sn, rn */
67 # define ARM_VMOV_AA_D 0x0c500b10 /* vmov rn,rn, dn */
68 # define ARM_VMOV_D_AA 0x0c400b10 /* vmov dn, rn,rn */
69 # define ARM_VCMP 0x0eb40a40
70 # define ARM_VMRS 0x0ef10a10
71 # define ARM_VMSR 0x0ee10a10
72 # define ARM_VCVT_2I 0x00040000 /* to integer */
73 # define ARM_VCVT_2S 0x00010000 /* to signed */
74 # define ARM_VCVT_RS 0x00000080 /* round to zero or signed */
75 # define ARM_VCVT 0x0eb80a40
76 # define ARM_VCVT_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
77 # define ARM_VCVT_U32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
78 # define ARM_VCVT_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
79 # define ARM_VCVT_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
80 # define ARM_VCVT_F32_S32 ARM_VCVT|ARM_VCVT_RS
81 # define ARM_VCVT_F32_U32 ARM_VCVT
82 # define ARM_VCVT_F64_S32 ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
83 # define ARM_VCVT_F64_U32 ARM_VCVT|ARM_V_F64
84 # define ARM_VCVT_F 0x0eb70ac0
85 # define ARM_VCVT_F32_F64 ARM_VCVT_F
86 # define ARM_VCVT_F64_F32 ARM_VCVT_F|ARM_V_F64
87 # define ARM_VCVTR_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
88 # define ARM_VCVTR_U32_F32 ARM_VCVT|ARM_VCVT_2I
89 # define ARM_VCVTR_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
90 # define ARM_VCVTR_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
91 # define ARM_VFMA 0x0ea00a00
92 # define ARM_VFMS 0x0ea00a40
93 # define ARM_VFNMA 0x0e900a00
94 # define ARM_VFNMS 0x0e900a40
95 # define ARM_V_D 0x00400000
96 # define ARM_V_N 0x00000080
97 # define ARM_V_Q 0x00000040
98 # define ARM_V_M 0x00000020
99 # define ARM_V_U 0x01000000
100 # define ARM_V_I16 0x00100000
101 # define ARM_V_I32 0x00200000
102 # define ARM_V_I64 0x00300000
103 # define ARM_V_S16 0x00040000
104 # define ARM_V_S32 0x00080000
105 # define ARM_VADD_I 0x02000800
106 # define ARM_VQADD_I 0x02000010 /* set flag on over/carry */
107 # define ARM_VADDL_I 0x02800000 /* q=d+d */
108 # define ARM_VADDW_I 0x02800100 /* q=q+d */
109 # define ARM_VSUB_I 0x03000800
110 # define ARM_VQSUB_I 0x02000210 /* set flag on over/carry */
111 # define ARM_VSUBL_I 0x02800200
112 # define ARM_VSUBW_I 0x02800300
113 # define ARM_VMUL_I 0x02000910
114 # define ARM_VMULL_I 0x02800c00
115 # define ARM_VABS_I 0x03b10300
116 # define ARM_VQABS_I 0x03b00700 /* sets flag on overflow */
117 # define ARM_VNEG_I 0x03b10380
118 # define ARM_VQNEG_I 0x03b00780 /* sets flag on overflow */
119 # define ARM_VAND 0x02000110
120 # define ARM_VBIC 0x02100110
121 # define ARM_VORR 0x02200110
122 # define ARM_VORN 0x02300110
123 # define ARM_VEOR 0x03000110
124 # define ARM_VMOVL_S8 0x00080000
125 # define ARM_VMOVL_S16 0x00100000
126 # define ARM_VMOVL_S32 0x00200000
127 # define ARM_VMOVL_I 0x02800a10
128 # define ARM_VMOVI 0x02800010
129 # define ARM_VMVNI 0x02800030
130 # define ARM_VLDR 0x0d100a00
131 # define ARM_VSTR 0x0d000a00
132 # define ARM_VM 0x0c000a00
133 # define ARM_VMOV_ADV_U 0x00800000 /* zero extend */
134 # define ARM_VMOV_ADV_8 0x00400000
135 # define ARM_VMOV_ADV_16 0x00000020
136 # define ARM_VMOV_A_D 0x0e100b10
137 # define ARM_VMOV_D_A 0x0e000b10
138 # define ARM_VCNT 0x03b00500
139 # define vodi(oi,r0) _vodi(_jit,oi,r0)
140 static void _vodi(jit_state_t*,int,int) maybe_unused;
141 # define voqi(oi,r0) _voqi(_jit,oi,r0)
142 static void _voqi(jit_state_t*,int,int) maybe_unused;
143 # define vo_ss(o,r0,r1) _cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
144 # define cc_vo_ss(cc,o,r0,r1) _cc_vo_ss(_jit,cc,o,r0,r1)
145 static void _cc_vo_ss(jit_state_t*,int,int,int,int);
146 # define vo_dd(o,r0,r1) _cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
147 # define cc_vo_dd(cc,o,r0,r1) _cc_vo_dd(_jit,cc,o,r0,r1)
148 static void _cc_vo_dd(jit_state_t*,int,int,int,int);
149 # define vo_qd(o,r0,r1) _cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
150 # define cc_vo_qd(cc,o,r0,r1) _cc_vo_qd(_jit,cc,o,r0,r1)
151 static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
152 # define vo_qq(o,r0,r1) _cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
153 # define cc_vo_qq(cc,o,r0,r1) _cc_vo_qq(_jit,cc,o,r0,r1)
154 static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
155 # define vorr_(o,r0,r1) _cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
156 # define cc_vorr_(cc,o,r0,r1) _cc_vorr_(_jit,cc,o,r0,r1)
157 static void _cc_vorr_(jit_state_t*,int,int,int,int);
158 # define vors_(o,r0,r1) _cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
159 # define cc_vors_(cc,o,r0,r1) _cc_vors_(_jit,cc,o,r0,r1)
160 static void _cc_vors_(jit_state_t*,int,int,int,int);
161 # define vorv_(o,r0,r1) _cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
162 # define cc_vorv_(cc,o,r0,r1) _cc_vorv_(_jit,cc,o,r0,r1)
163 static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
164 # define vo_vv(o,r0,r1) _cc_vo_vv(_jit,ARM_CC_NV,o,r0,r1)
165 static void _cc_vo_vv(jit_state_t*,int,int,int,int) maybe_unused;
166 # define vori_(o,r0,r1) _cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
167 # define cc_vori_(cc,o,r0,r1) _cc_vori_(_jit,cc,o,r0,r1)
168 static void _cc_vori_(jit_state_t*,int,int,int,int);
169 # define vorrd(o,r0,r1,r2) _cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
170 # define cc_vorrd(cc,o,r0,r1,r2) _cc_vorrd(_jit,cc,o,r0,r1,r2)
171 static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
172 # define vosss(o,r0,r1,r2) _cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
173 # define cc_vosss(cc,o,r0,r1,r2) _cc_vosss(_jit,cc,o,r0,r1,r2)
174 static void _cc_vosss(jit_state_t*,int,int,int,int,int);
175 # define voddd(o,r0,r1,r2) _cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
176 # define cc_voddd(cc,o,r0,r1,r2) _cc_voddd(_jit,cc,o,r0,r1,r2)
177 static void _cc_voddd(jit_state_t*,int,int,int,int,int);
178 # define voqdd(o,r0,r1,r2) _cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
179 # define cc_voqdd(cc,o,r0,r1,r2) _cc_voqdd(_jit,cc,o,r0,r1,r2)
180 static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
181 # define voqqd(o,r0,r1,r2) _cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
182 # define cc_voqqd(cc,o,r0,r1,r2) _cc_voqqd(_jit,cc,o,r0,r1,r2)
183 static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
184 # define voqqq(o,r0,r1,r2) _cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
185 # define cc_voqqq(cc,o,r0,r1,r2) _cc_voqqq(_jit,cc,o,r0,r1,r2)
186 static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
187 # define cc_vldst(cc,o,r0,r1,i0) _cc_vldst(_jit,cc,o,r0,r1,i0)
188 static void _cc_vldst(jit_state_t*,int,int,int,int,int);
189 # define cc_vorsl(cc,o,r0,r1,i0) _cc_vorsl(_jit,cc,o,r0,r1,i0)
190 static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
191 # define CC_VADD_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
192 # define VADD_F32(r0,r1,r2) CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
193 # define CC_VADD_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
194 # define VADD_F64(r0,r1,r2) CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
195 # define CC_VSUB_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
196 # define VSUB_F32(r0,r1,r2) CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
197 # define CC_VSUB_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
198 # define VSUB_F64(r0,r1,r2) CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
199 # define CC_VMUL_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
200 # define VMUL_F32(r0,r1,r2) CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
201 # define CC_VMUL_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
202 # define VMUL_F64(r0,r1,r2) CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
203 # define CC_VDIV_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
204 # define VDIV_F32(r0,r1,r2) CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
205 # define CC_VDIV_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
206 # define VDIV_F64(r0,r1,r2) CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
207 # define CC_VABS_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VABS_F,r0,r1)
208 # define VABS_F32(r0,r1) CC_VABS_F32(ARM_CC_AL,r0,r1)
209 # define CC_VABS_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
210 # define VABS_F64(r0,r1) CC_VABS_F64(ARM_CC_AL,r0,r1)
211 # define CC_VNEG_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
212 # define VNEG_F32(r0,r1) CC_VNEG_F32(ARM_CC_AL,r0,r1)
213 # define CC_VNEG_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
214 # define VNEG_F64(r0,r1) CC_VNEG_F64(ARM_CC_AL,r0,r1)
215 # define CC_VSQRT_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
216 # define VSQRT_F32(r0,r1) CC_VSQRT_F32(ARM_CC_AL,r0,r1)
217 # define CC_VSQRT_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
218 # define VSQRT_F64(r0,r1) CC_VSQRT_F64(ARM_CC_AL,r0,r1)
219 # define CC_VFMA_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMA,r0,r1,r2)
220 # define VFMA_F32(r0,r1,r2) CC_VFMA_F32(ARM_CC_AL,r0,r1,r2)
221 # define CC_VFMA_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMA|ARM_V_F64,r0,r1,r2)
222 # define VFMA_F64(r0,r1,r2) CC_VFMA_F64(ARM_CC_AL,r0,r1,r2)
223 # define CC_VFMS_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMS,r0,r1,r2)
224 # define VFMS_F32(r0,r1,r2) CC_VFMS_F32(ARM_CC_AL,r0,r1,r2)
225 # define CC_VFMS_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMS|ARM_V_F64,r0,r1,r2)
226 # define VFMS_F64(r0,r1,r2) CC_VFMS_F64(ARM_CC_AL,r0,r1,r2)
227 # define CC_VFNMA_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMA,r0,r1,r2)
228 # define VFNMA_F32(r0,r1,r2) CC_VFNMA_F32(ARM_CC_AL,r0,r1,r2)
229 # define CC_VFNMA_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMA|ARM_V_F64,r0,r1,r2)
230 # define VFNMA_F64(r0,r1,r2) CC_VFNMA_F64(ARM_CC_AL,r0,r1,r2)
231 # define CC_VFNMS_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMS,r0,r1,r2)
232 # define VFNMS_F32(r0,r1,r2) CC_VFNMS_F32(ARM_CC_AL,r0,r1,r2)
233 # define CC_VFNMS_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMS|ARM_V_F64,r0,r1,r2)
234 # define VFNMS_F64(r0,r1,r2) CC_VFNMS_F64(ARM_CC_AL,r0,r1,r2)
235 # define CC_VMOV_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
236 # define VMOV_F32(r0,r1) CC_VMOV_F32(ARM_CC_AL,r0,r1)
237 # define CC_VMOV_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
238 # define VMOV_F64(r0,r1) CC_VMOV_F64(ARM_CC_AL,r0,r1)
239 # define CC_VMOV_AA_D(cc,r0,r1,r2) cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
240 # define VMOV_AA_D(r0,r1,r2) CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
241 # define CC_VMOV_D_AA(cc,r0,r1,r2) cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
242 # define VMOV_D_AA(r0,r1,r2) CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
243 # define CC_VMOV_A_S(cc,r0,r1) cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
244 # define VMOV_A_S(r0,r1) CC_VMOV_A_S(ARM_CC_AL,r0,r1)
245 # define CC_VMOV_S_A(cc,r0,r1) cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
246 # define VMOV_S_A(r0,r1) CC_VMOV_S_A(ARM_CC_AL,r0,r1)
247 # define CC_VCMP_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCMP,r0,r1)
248 # define VCMP_F32(r0,r1) CC_VCMP_F32(ARM_CC_AL,r0,r1)
249 # define CC_VCMP_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
250 # define VCMP_F64(r0,r1) CC_VCMP_F64(ARM_CC_AL,r0,r1)
251 # define CC_VCMPE_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
252 # define VCMPE_F32(r0,r1) CC_VCMPE_F32(ARM_CC_AL,r0,r1)
253 # define CC_VCMPE_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
254 # define VCMPE_F64(r0,r1) CC_VCMPE_F64(ARM_CC_AL,r0,r1)
255 # define CC_VCMPZ_F32(cc,r0) cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
256 # define VCMPZ_F32(r0) CC_VCMPZ_F32(ARM_CC_AL,r0)
257 # define CC_VCMPZ_F64(cc,r0) cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
258 # define VCMPZ_F64(r0) CC_VCMPZ_F64(ARM_CC_AL,r0)
259 # define CC_VCMPEZ_F32(cc,r0) cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
260 # define VCMPEZ_F32(r0) CC_VCMPEZ_F32(ARM_CC_AL,r0)
261 # define CC_VCMPEZ_F64(cc,r0) cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
262 # define VCMPEZ_F64(r0) CC_VCMPEZ_F64(ARM_CC_AL,r0)
263 # define CC_VMRS(cc,r0) cc_vorr_(cc,ARM_VMRS,r0,0)
264 # define VMRS(r0) CC_VMRS(ARM_CC_AL,r0)
265 # define CC_VMSR(cc,r0) cc_vorr_(cc,ARM_VMSR,r0,0)
266 # define VMSR(r0) CC_VMSR(ARM_CC_AL,r0)
267 # define CC_VCVT_S32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
268 # define VCVT_S32_F32(r0,r1) CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
269 # define CC_VCVT_U32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
270 # define VCVT_U32_F32(r0,r1) CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
271 # define CC_VCVT_S32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
272 # define VCVT_S32_F64(r0,r1) CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
273 # define CC_VCVT_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
274 # define VCVT_U32_F64(r0,r1) CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
275 # define CC_VCVT_F32_S32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
276 # define VCVT_F32_S32(r0,r1) CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
277 # define CC_VCVT_F32_U32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
278 # define VCVT_F32_U32(r0,r1) CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
279 # define CC_VCVT_F64_S32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
280 # define VCVT_F64_S32(r0,r1) CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
281 # define CC_VCVT_F64_U32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
282 # define VCVT_F64_U32(r0,r1) CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
283 # define CC_VCVT_F32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
284 # define VCVT_F32_F64(r0,r1) CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
285 # define CC_VCVT_F64_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
286 # define VCVT_F64_F32(r0,r1) CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
287 # define CC_VCVTR_S32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
288 # define VCVTR_S32_F32(r0,r1) CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
289 # define CC_VCVTR_U32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
290 # define VCVTR_U32_F32(r0,r1) CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
291 # define CC_VCVTR_S32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
292 # define VCVTR_S32_F64(r0,r1) CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
293 # define CC_VCVTR_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
294 # define VCVTR_U32_F64(r0,r1) CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
295 # define CC_VLDMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
296 # define VLDMIA_F32(r0,r1,i0) CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
297 # define CC_VLDMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
298 # define VLDMIA_F64(r0,r1,i0) CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
299 # define CC_VSTMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
300 # define VSTMIA_F32(r0,r1,i0) CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
301 # define CC_VSTMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
302 # define VSTMIA_F64(r0,r1,i0) CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
303 # define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
304 # define VLDMIA_U_F32(r0,r1,i0) CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
305 # define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
306 # define VLDMIA_U_F64(r0,r1,i0) CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
307 # define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
308 # define VSTMIA_U_F32(r0,r1,i0) CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
309 # define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
310 # define VSTMIA_U_F64(r0,r1,i0) CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
311 # define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
312 # define VLDMDB_U_F32(r0,r1,i0) CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
313 # define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
314 # define VLDMDB_U_F64(r0,r1,i0) CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
315 # define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
316 # define VSTMDB_U_F32(r0,r1,i0) CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
317 # define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
318 # define VSTMDB_U_F64(r0,r1,i0) CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
319 # define CC_VPUSH_F32(cc,r0,i0) CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
320 # define VPUSH_F32(r0,i0) CC_VPUSH_F32(ARM_CC_AL,r0,i0)
321 # define CC_VPUSH_F64(cc,r0,i0) CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
322 # define VPUSH_F64(r0,i0) CC_VPUSH_F64(ARM_CC_AL,r0,i0)
323 # define CC_VPOP_F32(cc,r0,i0) CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
324 # define VPOP_F32(r0,i0) CC_VPOP_F32(ARM_CC_AL,r0,i0)
325 # define CC_VPOP_F64(cc,r0,i0) CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
326 # define VPOP_F64(r0,i0) CC_VPOP_F64(ARM_CC_AL,r0,i0)
327 # define CC_VMOV_A_S8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
328 # define VMOV_A_S8(r0,r1) CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
329 # define CC_VMOV_A_U8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
330 # define VMOV_A_U8(r0,r1) CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
331 # define CC_VMOV_A_S16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
332 # define VMOV_A_S16(r0,r1) CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
333 # define CC_VMOV_A_U16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
334 # define VMOV_A_U16(r0,r1) CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
335 # define CC_VMOV_A_S32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
336 # define VMOV_A_S32(r0,r1) CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
337 # define CC_VMOV_A_U32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
338 # define VMOV_A_U32(r0,r1) CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
339 # define CC_VMOV_V_I8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
340 # define VMOV_V_I8(r0,r1) CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
341 # define CC_VMOV_V_I16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
342 # define VMOV_V_I16(r0,r1) CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
343 # define CC_VMOV_V_I32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
344 # define VMOV_V_I32(r0,r1) CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
345 # define VCNT(r0,r1) vo_vv(ARM_VCNT,r0,r1)
346 # define VADD_I8(r0,r1,r2) voddd(ARM_VADD_I,r0,r1,r2)
347 # define VADDQ_I8(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
348 # define VADD_I16(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
349 # define VADDQ_I16(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
350 # define VADD_I32(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
351 # define VADDQ_I32(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
352 # define VADD_I64(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
353 # define VADDQ_I64(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
354 # define VQADD_S8(r0,r1,r2) voddd(ARM_VQADD_I,r0,r1,r2)
355 # define VQADDQ_S8(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
356 # define VQADD_U8(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
357 # define VQADDQ_U8(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
358 # define VQADD_S16(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
359 # define VQADDQ_S16(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
360 # define VQADD_U16(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
361 # define VQADDQ_U16(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
362 # define VQADD_S32(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
363 # define VQADDQ_S32(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
364 # define VQADD_U32(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
365 # define VQADDQ_U32(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
366 # define VQADD_S64(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
367 # define VQADDQ_S64(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
368 # define VQADD_U64(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
369 # define VQADDQ_U64(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
370 # define VADDL_S8(r0,r1,r2) voqdd(ARM_VADDL_I,r0,r1,r2)
371 # define VADDL_U8(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
372 # define VADDL_S16(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
373 # define VADDL_U16(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
374 # define VADDL_S32(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
375 # define VADDL_U32(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
376 # define VADDW_S8(r0,r1,r2) voqqd(ARM_VADDW_I,r0,r1,r2)
377 # define VADDW_U8(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
378 # define VADDW_S16(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
379 # define VADDW_U16(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
380 # define VADDW_S32(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
381 # define VADDW_U32(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
382 # define VSUB_I8(r0,r1,r2) voddd(ARM_VSUB_I,r0,r1,r2)
383 # define VSUBQ_I8(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
384 # define VSUB_I16(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
385 # define VSUBQ_I16(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
386 # define VSUB_I32(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
387 # define VSUBQ_I32(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
388 # define VSUB_I64(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
389 # define VSUBQ_I64(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
390 # define VQSUB_S8(r0,r1,r2) voddd(ARM_VQSUB_I,r0,r1,r2)
391 # define VQSUBQ_S8(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
392 # define VQSUB_U8(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
393 # define VQSUBQ_U8(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
394 # define VQSUB_S16(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
395 # define VQSUBQ_S16(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
396 # define VQSUB_U16(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
397 # define VQSUBQ_U16(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
398 # define VQSUB_S32(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
399 # define VQSUBQ_S32(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
400 # define VQSUB_U32(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
401 # define VQSUBQ_U32(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
402 # define VQSUB_S64(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
403 # define VQSUBQ_S64(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
404 # define VQSUB_U64(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
405 # define VQSUBQ_U64(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
406 # define VSUBL_S8(r0,r1,r2) voqdd(ARM_VSUBL_I,r0,r1,r2)
407 # define VSUBL_U8(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
408 # define VSUBL_S16(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
409 # define VSUBL_U16(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
410 # define VSUBL_S32(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
411 # define VSUBL_U32(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
412 # define VSUBW_S8(r0,r1,r2) voqqd(ARM_VSUBW_I,r0,r1,r2)
413 # define VSUBW_U8(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
414 # define VSUBW_S16(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
415 # define VSUBW_U16(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
416 # define VSUBW_S32(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
417 # define VSUBW_U32(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
418 # define VMUL_I8(r0,r1,r2) voddd(ARM_VMUL_I,r0,r1,r2)
419 # define VMULQ_I8(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
420 # define VMUL_I16(r0,r1,r2) voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
421 # define VMULQ_I16(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
422 # define VMUL_I32(r0,r1,r2) voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
423 # define VMULQ_I32(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
424 # define VMULL_S8(r0,r1,r2) voddd(ARM_VMULL_I,r0,r1,r2)
425 # define VMULL_U8(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
426 # define VMULL_S16(r0,r1,r2) voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
427 # define VMULL_U16(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
428 # define VMULL_S32(r0,r1,r2) voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
429 # define VMULL_U32(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
430 # define VABS_S8(r0,r1) vo_dd(ARM_VABS_I,r0,r1)
431 # define VABSQ_S8(r0,r1) vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
432 # define VABS_S16(r0,r1) vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
433 # define VABSQ_S16(r0,r1) vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
434 # define VABS_S32(r0,r1) vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
435 # define VABSQ_S32(r0,r1) vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
436 # define VQABS_S8(r0,r1) vo_dd(ARM_VQABS_I,r0,r1)
437 # define VQABSQ_S8(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
438 # define VQABS_S16(r0,r1) vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
439 # define VQABSQ_S16(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
440 # define VQABS_S32(r0,r1) vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
441 # define VQABSQ_S32(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
442 # define VNEG_S8(r0,r1) vo_dd(ARM_VNEG_I,r0,r1)
443 # define VNEGQ_S8(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
444 # define VNEG_S16(r0,r1) vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
445 # define VNEGQ_S16(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
446 # define VNEG_S32(r0,r1) vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
447 # define VNEGQ_S32(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
448 # define VQNEG_S8(r0,r1) vo_dd(ARM_VQNEG_I,r0,r1)
449 # define VQNEGQ_S8(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
450 # define VQNEG_S16(r0,r1) vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
451 # define VQNEGQ_S16(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
452 # define VQNEG_S32(r0,r1) vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
453 # define VQNEGQ_S32(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
454 # define VAND(r0,r1,r2) voddd(ARM_VAND,r0,r1,r2)
455 # define VANDQ(r0,r1,r2) voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
456 # define VBIC(r0,r1,r2) voddd(ARM_VBIC,r0,r1,r2)
457 # define VBICQ(r0,r1,r2) voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
458 # define VORR(r0,r1,r2) voddd(ARM_VORR,r0,r1,r2)
459 # define VORRQ(r0,r1,r2) voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
460 # define VORN(r0,r1,r2) voddd(ARM_VORN,r0,r1,r2)
461 # define VORNQ(r0,r1,r2) voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
462 # define VEOR(r0,r1,r2) voddd(ARM_VEOR,r0,r1,r2)
463 # define VEORQ(r0,r1,r2) voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
464 # define VMOV(r0,r1) VORR(r0,r1,r1)
465 # define VMOVQ(r0,r1) VORRQ(r0,r1,r1)
466 # define VMOVL_S8(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
467 # define VMOVL_U8(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
468 # define VMOVL_S16(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
469 # define VMOVL_U16(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
470 # define VMOVL_S32(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
471 # define VMOVL_U32(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
472 /* "oi" should be the result of encode_vfp_double */
473 # define VIMM(oi,r0) vodi(oi,r0)
474 # define VIMMQ(oi,r0) voqi(oi|ARM_V_Q,r0)
475 /* index is multipled by four */
476 # define CC_VLDRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR,r0,r1,i0)
477 # define VLDRN_F32(r0,r1,i0) CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
478 # define CC_VLDR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
479 # define VLDR_F32(r0,r1,i0) CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
480 # define CC_VLDRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
481 # define VLDRN_F64(r0,r1,i0) CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
482 # define CC_VLDR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
483 # define VLDR_F64(r0,r1,i0) CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
484 # define CC_VSTRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR,r0,r1,i0)
485 # define VSTRN_F32(r0,r1,i0) CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
486 # define CC_VSTR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
487 # define VSTR_F32(r0,r1,i0) CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
488 # define CC_VSTRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
489 # define VSTRN_F64(r0,r1,i0) CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
490 # define CC_VSTR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
491 # define VSTR_F64(r0,r1,i0) CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
492 # define vfp_popcntr(r0,r1) _vfp_popcntr(_jit,r0,r1)
493 static void _vfp_popcntr(jit_state_t*,jit_int32_t,jit_int32_t);
494 # define vfp_movr_f(r0,r1) _vfp_movr_f(_jit,r0,r1)
495 static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
496 # define vfp_movi_f(r0,i0) _vfp_movi_f(_jit,r0,i0)
497 static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
498 # define vfp_movr_w_f(r0, r1) VMOV_S_A(r0, r1)
499 # define vfp_movr_f_w(r0, r1) VMOV_A_S(r0, r1)
500 # define vfp_movi_w_f(r0, i0) _vfp_movi_w_f(_jit, r0, i0)
501 static void _vfp_movi_w_f(jit_state_t*, jit_int32_t, jit_word_t);
502 # define vfp_movr_d(r0,r1) _vfp_movr_d(_jit,r0,r1)
503 static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
504 # define vfp_movi_d(r0,i0) _vfp_movi_d(_jit,r0,i0)
505 static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
506 # define vfp_movr_ww_d(r0, r1, r2) VMOV_D_AA(r0, r1, r2)
507 # define vfp_movr_d_ww(r0, r1, r2) VMOV_AA_D(r0, r1, r2)
508 static void _vfp_movr_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
509 # define vfp_movi_ww_d(r0, i0, i1) _vfp_movi_ww_d(_jit, r0, i0, i1)
510 static void _vfp_movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
511 # define vfp_extr_f(r0,r1) _vfp_extr_f(_jit,r0,r1)
512 static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
513 # define vfp_extr_d(r0,r1) _vfp_extr_d(_jit,r0,r1)
514 static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
515 # define vfp_extr_d_f(r0,r1) _vfp_extr_d_f(_jit,r0,r1)
516 static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
517 # define vfp_extr_f_d(r0,r1) _vfp_extr_f_d(_jit,r0,r1)
518 static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
519 # define vfp_truncr_f_i(r0,r1) _vfp_truncr_f_i(_jit,r0,r1)
520 static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
521 # define vfp_truncr_d_i(r0,r1) _vfp_truncr_d_i(_jit,r0,r1)
522 static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
523 # define vfp_absr_f(r0,r1) VABS_F32(r0,r1)
524 # define vfp_absr_d(r0,r1) VABS_F64(r0,r1)
525 # define vfp_negr_f(r0,r1) VNEG_F32(r0,r1)
526 # define vfp_negr_d(r0,r1) VNEG_F64(r0,r1)
527 # define vfp_sqrtr_f(r0,r1) VSQRT_F32(r0,r1)
528 # define vfp_fmar_f(r0,r1,r2,r3) _vfp_fmar_f(_jit,r0,r1,r2,r3)
529 static void _vfp_fmar_f(jit_state_t*,
530 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
531 # define vfp_fmsr_f(r0,r1,r2,r3) _vfp_fmsr_f(_jit,r0,r1,r2,r3)
532 static void _vfp_fmsr_f(jit_state_t*,
533 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
534 # define vfp_fnmar_f(r0,r1,r2,r3) _vfp_fnmar_f(_jit,r0,r1,r2,r3)
535 static void _vfp_fnmar_f(jit_state_t*,
536 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
537 # define vfp_fnmsr_f(r0,r1,r2,r3) _vfp_fnmsr_f(_jit,r0,r1,r2,r3)
538 static void _vfp_fnmsr_f(jit_state_t*,
539 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
540 # define vfp_sqrtr_d(r0,r1) VSQRT_F64(r0,r1)
541 # define vfp_fmar_d(r0,r1,r2,r3) _vfp_fmar_d(_jit,r0,r1,r2,r3)
542 static void _vfp_fmar_d(jit_state_t*,
543 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
544 # define vfp_fmsr_d(r0,r1,r2,r3) _vfp_fmsr_d(_jit,r0,r1,r2,r3)
545 static void _vfp_fmsr_d(jit_state_t*,
546 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
547 # define vfp_fnmar_d(r0,r1,r2,r3) _vfp_fnmar_d(_jit,r0,r1,r2,r3)
548 static void _vfp_fnmar_d(jit_state_t*,
549 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
550 # define vfp_fnmsr_d(r0,r1,r2,r3) _vfp_fnmsr_d(_jit,r0,r1,r2,r3)
551 static void _vfp_fnmsr_d(jit_state_t*,
552 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
553 # define vfp_addr_f(r0,r1,r2) VADD_F32(r0,r1,r2)
554 # define vfp_addi_f(r0,r1,i0) _vfp_addi_f(_jit,r0,r1,i0)
555 static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
556 # define vfp_addr_d(r0,r1,r2) VADD_F64(r0,r1,r2)
557 # define vfp_addi_d(r0,r1,i0) _vfp_addi_d(_jit,r0,r1,i0)
558 static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
559 # define vfp_subr_f(r0,r1,r2) VSUB_F32(r0,r1,r2)
560 # define vfp_subi_f(r0,r1,i0) _vfp_subi_f(_jit,r0,r1,i0)
561 static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
562 # define vfp_subr_d(r0,r1,r2) VSUB_F64(r0,r1,r2)
563 # define vfp_subi_d(r0,r1,i0) _vfp_subi_d(_jit,r0,r1,i0)
564 static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
565 # define vfp_rsbr_f(r0,r1,r2) vfp_subr_f(r0,r2,r1)
566 # define vfp_rsbi_f(r0,r1,i0) _vfp_rsbi_f(_jit,r0,r1,i0)
567 static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
568 # define vfp_rsbr_d(r0,r1,r2) vfp_subr_d(r0,r2,r1)
569 # define vfp_rsbi_d(r0,r1,i0) _vfp_rsbi_d(_jit,r0,r1,i0)
570 static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
571 # define vfp_mulr_f(r0,r1,r2) VMUL_F32(r0,r1,r2)
572 # define vfp_muli_f(r0,r1,i0) _vfp_muli_f(_jit,r0,r1,i0)
573 static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
574 # define vfp_mulr_d(r0,r1,r2) VMUL_F64(r0,r1,r2)
575 # define vfp_muli_d(r0,r1,i0) _vfp_muli_d(_jit,r0,r1,i0)
576 static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
577 # define vfp_divr_f(r0,r1,r2) VDIV_F32(r0,r1,r2)
578 # define vfp_divi_f(r0,r1,i0) _vfp_divi_f(_jit,r0,r1,i0)
579 static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
580 # define vfp_divr_d(r0,r1,r2) VDIV_F64(r0,r1,r2)
581 # define vfp_divi_d(r0,r1,i0) _vfp_divi_d(_jit,r0,r1,i0)
582 static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
583 # define vfp_cmp_f(r0,r1) _vfp_cmp_f(_jit,r0,r1)
584 static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
585 # define vfp_cmp_d(r0,r1) _vfp_cmp_d(_jit,r0,r1)
586 static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
587 # define vcmp01_x(c0,c1,r0) _vcmp01_x(_jit,c0,c1,r0)
588 static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
589 # define vcmp01_f(c0,c1,r0,r1,r2) _vcmp01_f(_jit,c0,c1,r0,r1,r2)
590 static void _vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
591 # define vcmp01_d(c0,c1,r0,r1,r2) _vcmp01_d(_jit,c0,c1,r0,r1,r2)
592 static void _vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
593 # define vfp_ltr_f(r0,r1,r2) vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
594 # define vfp_lti_f(r0,r1,i0) _vfp_lti_f(_jit,r0,r1,i0)
595 static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
596 # define vfp_ltr_d(r0,r1,r2) vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
597 # define vfp_lti_d(r0,r1,i0) _vfp_lti_d(_jit,r0,r1,i0)
598 static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
599 # define vfp_ler_f(r0,r1,r2) vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
600 # define vfp_lei_f(r0,r1,i0) _vfp_lei_f(_jit,r0,r1,i0)
601 static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
602 # define vfp_ler_d(r0,r1,r2) vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
603 # define vfp_lei_d(r0,r1,i0) _vfp_lei_d(_jit,r0,r1,i0)
604 static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
605 # define vfp_eqr_f(r0,r1,r2) vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
606 # define vfp_eqi_f(r0,r1,i0) _vfp_eqi_f(_jit,r0,r1,i0)
607 static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
608 # define vfp_eqr_d(r0,r1,r2) vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
609 # define vfp_eqi_d(r0,r1,i0) _vfp_eqi_d(_jit,r0,r1,i0)
610 static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
611 # define vfp_ger_f(r0,r1,r2) vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
612 # define vfp_gei_f(r0,r1,i0) _vfp_gei_f(_jit,r0,r1,i0)
613 static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
614 # define vfp_ger_d(r0,r1,r2) vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
615 # define vfp_gei_d(r0,r1,i0) _vfp_gei_d(_jit,r0,r1,i0)
616 static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
617 # define vfp_gtr_f(r0,r1,r2) vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
618 # define vfp_gti_f(r0,r1,i0) _vfp_gti_f(_jit,r0,r1,i0)
619 static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
620 # define vfp_gtr_d(r0,r1,r2) vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
621 # define vfp_gti_d(r0,r1,i0) _vfp_gti_d(_jit,r0,r1,i0)
622 static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
623 # define vfp_ner_f(r0,r1,r2) vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
624 # define vfp_nei_f(r0,r1,i0) _vfp_nei_f(_jit,r0,r1,i0)
625 static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
626 # define vfp_ner_d(r0,r1,r2) vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
627 # define vfp_nei_d(r0,r1,i0) _vfp_nei_d(_jit,r0,r1,i0)
628 static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
629 # define vcmp10_x(c0,r0) _vcmp10_x(_jit,c0,r0)
630 static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
631 # define vcmp_10_f(c0,r0,r1,r2) _vcmp_10_f(_jit,c0,r0,r1,r2)
632 static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
633 # define vcmp_10_d(c0,r0,r1,r2) _vcmp_10_d(_jit,c0,r0,r1,r2)
634 static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
635 # define vfp_unltr_f(r0,r1,r2) vcmp_10_f(ARM_CC_GE,r0,r1,r2)
636 # define vfp_unlti_f(r0,r1,i0) _vfp_unlti_f(_jit,r0,r1,i0)
637 static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
638 # define vfp_unltr_d(r0,r1,r2) vcmp_10_d(ARM_CC_GE,r0,r1,r2)
639 # define vfp_unlti_d(r0,r1,i0) _vfp_unlti_d(_jit,r0,r1,i0)
640 static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
641 # define vfp_unler_f(r0,r1,r2) vcmp_10_f(ARM_CC_GT,r0,r1,r2)
642 # define vfp_unlei_f(r0,r1,i0) _vfp_unlei_f(_jit,r0,r1,i0)
643 static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
644 # define vfp_unler_d(r0,r1,r2) vcmp_10_d(ARM_CC_GT,r0,r1,r2)
645 # define vfp_unlei_d(r0,r1,i0) _vfp_unlei_d(_jit,r0,r1,i0)
646 static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
647 # define vfp_uneqr_x(r0) _vfp_uneqr_x(_jit,r0)
648 static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
649 # define vfp_uneqr_f(r0,r1,r2) _vfp_uneqr_f(_jit,r0,r1,r2)
650 static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
651 # define vfp_uneqi_f(r0,r1,i0) _vfp_uneqi_f(_jit,r0,r1,i0)
652 static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
653 # define vfp_uneqr_d(r0,r1,r2) _vfp_uneqr_d(_jit,r0,r1,r2)
654 static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
655 # define vfp_uneqi_d(r0,r1,i0) _vfp_uneqi_d(_jit,r0,r1,i0)
656 static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
657 # define vcmp_01_x(c0,r0) _vcmp_01_x(_jit,c0,r0)
658 static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
659 # define vcmp_01_f(c0,r0,r1,r2) _vcmp_01_f(_jit,c0,r0,r1,r2)
660 static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
661 # define vcmp_01_d(c0,r0,r1,r2) _vcmp_01_d(_jit,c0,r0,r1,r2)
662 static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
663 # define vfp_unger_f(r0,r1,r2) vcmp_01_f(ARM_CC_CS,r0,r1,r2)
664 # define vfp_ungei_f(r0,r1,i0) _vfp_ungei_f(_jit,r0,r1,i0)
665 static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
666 # define vfp_unger_d(r0,r1,r2) vcmp_01_d(ARM_CC_CS,r0,r1,r2)
667 # define vfp_ungei_d(r0,r1,i0) _vfp_ungei_d(_jit,r0,r1,i0)
668 static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
669 # define vfp_ungtr_f(r0,r1,r2) vcmp_01_f(ARM_CC_HI,r0,r1,r2)
670 # define vfp_ungti_f(r0,r1,i0) _vfp_ungti_f(_jit,r0,r1,i0)
671 static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
672 # define vfp_ungtr_d(r0,r1,r2) vcmp_01_d(ARM_CC_HI,r0,r1,r2)
673 # define vfp_ungti_d(r0,r1,i0) _vfp_ungti_d(_jit,r0,r1,i0)
674 static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
675 # define vfp_ltgtr_x(r0) _vfp_ltgtr_x(_jit,r0)
676 static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
677 # define vfp_ltgtr_f(r0,r1,r2) _vfp_ltgtr_f(_jit,r0,r1,r2)
678 static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
679 # define vfp_ltgti_f(r0,r1,i0) _vfp_ltgti_f(_jit,r0,r1,i0)
680 static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
681 # define vfp_ltgtr_d(r0,r1,r2) _vfp_ltgtr_d(_jit,r0,r1,r2)
682 static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
683 # define vfp_ltgti_d(r0,r1,i0) _vfp_ltgti_d(_jit,r0,r1,i0)
684 static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
685 # define vfp_ordr_f(r0,r1,r2) _vfp_ordr_f(_jit,r0,r1,r2)
686 static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
687 # define vfp_ordi_f(r0,r1,i0) _vfp_ordi_f(_jit,r0,r1,i0)
688 static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
689 # define vfp_ordr_d(r0,r1,r2) _vfp_ordr_d(_jit,r0,r1,r2)
690 static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
691 # define vfp_ordi_d(r0,r1,i0) _vfp_ordi_d(_jit,r0,r1,i0)
692 static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
693 # define vfp_unordr_f(r0,r1,r2) _vfp_unordr_f(_jit,r0,r1,r2)
694 static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
695 # define vfp_unordi_f(r0,r1,i0) _vfp_unordi_f(_jit,r0,r1,i0)
696 static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
697 # define vfp_unordr_d(r0,r1,r2) _vfp_unordr_d(_jit,r0,r1,r2)
698 static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
699 # define vfp_unordi_d(r0,r1,i0) _vfp_unordi_d(_jit,r0,r1,i0)
700 static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
701 # define vbcmp_x(cc,i0) _vbcmp_x(_jit,cc,i0)
702 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
703 # define vbcmp_f(cc,i0,r0,r1) _vbcmp_f(_jit,cc,i0,r0,r1)
705 _vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
706 # define vbcmp_x(cc,i0) _vbcmp_x(_jit,cc,i0)
707 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
708 # define vbcmp_d(cc,i0,r0,r1) _vbcmp_d(_jit,cc,i0,r0,r1)
710 _vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
711 # define vfp_bltr_f(i0,r0,r1) vbcmp_f(ARM_CC_MI,i0,r0,r1)
712 # define vfp_blti_f(i0,r0,i1) _vfp_blti_f(_jit,i0,r0,i1)
713 static jit_word_t _vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
714 # define vfp_bltr_d(i0,r0,r1) vbcmp_d(ARM_CC_MI,i0,r0,r1)
715 static jit_word_t _vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
716 # define vfp_blti_d(i0,r0,i1) _vfp_blti_d(_jit,i0,r0,i1)
717 # define vfp_bler_f(i0,r0,r1) vbcmp_f(ARM_CC_LS,i0,r0,r1)
718 # define vfp_blei_f(i0,r0,i1) _vfp_blei_f(_jit,i0,r0,i1)
719 static jit_word_t _vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
720 # define vfp_bler_d(i0,r0,r1) vbcmp_d(ARM_CC_LS,i0,r0,r1)
721 # define vfp_blei_d(i0,r0,i1) _vfp_blei_d(_jit,i0,r0,i1)
722 static jit_word_t _vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
723 # define vfp_beqr_f(i0,r0,r1) vbcmp_f(ARM_CC_EQ,i0,r0,r1)
724 # define vfp_beqi_f(i0,r0,i1) _vfp_beqi_f(_jit,i0,r0,i1)
725 static jit_word_t _vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
726 # define vfp_beqr_d(i0,r0,r1) vbcmp_d(ARM_CC_EQ,i0,r0,r1)
727 # define vfp_beqi_d(i0,r0,i1) _vfp_beqi_d(_jit,i0,r0,i1)
728 static jit_word_t _vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
729 # define vfp_bger_f(i0,r0,r1) vbcmp_f(ARM_CC_GE,i0,r0,r1)
730 # define vfp_bgei_f(i0,r0,i1) _vfp_bgei_f(_jit,i0,r0,i1)
731 static jit_word_t _vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
732 # define vfp_bger_d(i0,r0,r1) vbcmp_d(ARM_CC_GE,i0,r0,r1)
733 # define vfp_bgei_d(i0,r0,i1) _vfp_bgei_d(_jit,i0,r0,i1)
734 static jit_word_t _vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
735 # define vfp_bgtr_f(i0,r0,r1) vbcmp_f(ARM_CC_GT,i0,r0,r1)
736 # define vfp_bgti_f(i0,r0,i1) _vfp_bgti_f(_jit,i0,r0,i1)
737 static jit_word_t _vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
738 # define vfp_bgtr_d(i0,r0,r1) vbcmp_d(ARM_CC_GT,i0,r0,r1)
739 # define vfp_bgti_d(i0,r0,i1) _vfp_bgti_d(_jit,i0,r0,i1)
740 static jit_word_t _vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
741 # define vfp_bner_f(i0,r0,r1) vbcmp_f(ARM_CC_NE,i0,r0,r1)
742 # define vfp_bnei_f(i0,r0,i1) _vfp_bnei_f(_jit,i0,r0,i1)
743 static jit_word_t _vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
744 # define vfp_bner_d(i0,r0,r1) vbcmp_d(ARM_CC_NE,i0,r0,r1)
745 # define vfp_bnei_d(i0,r0,i1) _vfp_bnei_d(_jit,i0,r0,i1)
746 static jit_word_t _vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
747 # define vbncmp_x(cc,i0) _vbncmp_x(_jit,cc,i0)
748 static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
749 # define vbncmp_f(cc,i0,r0,r1) _vbncmp_f(_jit,cc,i0,r0,r1)
751 _vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
752 # define vbncmp_d(cc,i0,r0,r1) _vbncmp_d(_jit,cc,i0,r0,r1)
754 _vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
755 # define vfp_bunltr_f(i0,r0,r1) vbncmp_f(ARM_CC_GE,i0,r0,r1)
756 # define vfp_bunlti_f(i0,r0,i1) _vfp_bunlti_f(_jit,i0,r0,i1)
758 _vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
759 # define vfp_bunltr_d(i0,r0,r1) vbncmp_d(ARM_CC_GE,i0,r0,r1)
760 # define vfp_bunlti_d(i0,r0,i1) _vfp_bunlti_d(_jit,i0,r0,i1)
762 _vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
763 # define vfp_bunler_f(i0,r0,r1) vbncmp_f(ARM_CC_GT,i0,r0,r1)
764 # define vfp_bunlei_f(i0,r0,i1) _vfp_bunlei_f(_jit,i0,r0,i1)
766 _vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
767 # define vfp_bunler_d(i0,r0,r1) vbncmp_d(ARM_CC_GT,i0,r0,r1)
768 # define vfp_bunlei_d(i0,r0,i1) _vfp_bunlei_d(_jit,i0,r0,i1)
770 _vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
771 # define vfp_buneqr_x(i0) _vfp_buneqr_x(_jit,i0)
772 static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
773 # define vfp_buneqr_f(i0,r0,r1) _vfp_buneqr_f(_jit,i0,r0,r1)
775 _vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
776 # define vfp_buneqi_f(i0,r0,i1) _vfp_buneqi_f(_jit,i0,r0,i1)
778 _vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
779 # define vfp_buneqr_d(i0,r0,r1) _vfp_buneqr_d(_jit,i0,r0,r1)
781 _vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
782 # define vfp_buneqi_d(i0,r0,i1) _vfp_buneqi_d(_jit,i0,r0,i1)
784 _vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
785 # define vfp_bunger_x(i0) _vfp_bunger_x(_jit,i0)
786 static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
787 # define vfp_bunger_f(i0,r0,r1) _vfp_bunger_f(_jit,i0,r0,r1)
789 _vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
790 # define vfp_bungei_f(i0,r0,i1) _vfp_bungei_f(_jit,i0,r0,i1)
792 _vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
793 # define vfp_bunger_d(i0,r0,r1) _vfp_bunger_d(_jit,i0,r0,r1)
795 _vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
796 # define vfp_bungei_d(i0,r0,i1) _vfp_bungei_d(_jit,i0,r0,i1)
798 _vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
799 # define vfp_bungtr_f(i0,r0,r1) vbcmp_f(ARM_CC_HI,i0,r0,r1)
800 # define vfp_bungti_f(i0,r0,i1) _vfp_bungti_f(_jit,i0,r0,i1)
802 _vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
803 # define vfp_bungtr_d(i0,r0,r1) vbcmp_d(ARM_CC_HI,i0,r0,r1)
804 # define vfp_bungti_d(i0,r0,i1) _vfp_bungti_d(_jit,i0,r0,i1)
806 _vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
807 # define vfp_bltgtr_x(i0) _vfp_bltgtr_x(_jit,i0)
808 static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
809 # define vfp_bltgtr_f(i0,r0,r1) _vfp_bltgtr_f(_jit,i0,r0,r1)
811 _vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
812 # define vfp_bltgti_f(i0,r0,i1) _vfp_bltgti_f(_jit,i0,r0,i1)
814 _vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
815 # define vfp_bltgtr_d(i0,r0,r1) _vfp_bltgtr_d(_jit,i0,r0,r1)
817 _vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
818 # define vfp_bltgti_d(i0,r0,i1) _vfp_bltgti_d(_jit,i0,r0,i1)
820 _vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
821 # define vfp_bordr_f(i0,r0,r1) vbcmp_f(ARM_CC_VC,i0,r0,r1)
822 # define vfp_bordi_f(i0,r0,i1) _vfp_bordi_f(_jit,i0,r0,i1)
824 _vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
825 # define vfp_bordr_d(i0,r0,r1) vbcmp_d(ARM_CC_VC,i0,r0,r1)
826 # define vfp_bordi_d(i0,r0,i1) _vfp_bordi_d(_jit,i0,r0,i1)
828 _vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
829 # define vfp_bunordr_f(i0,r0,r1) vbcmp_f(ARM_CC_VS,i0,r0,r1)
830 # define vfp_bunordi_f(i0,r0,i1) _vfp_bunordi_f(_jit,i0,r0,i1)
832 _vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
833 # define vfp_bunordr_d(i0,r0,r1) vbcmp_d(ARM_CC_VS,i0,r0,r1)
834 # define vfp_bunordi_d(i0,r0,i1) _vfp_bunordi_d(_jit,i0,r0,i1)
836 _vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
837 # define vfp_ldr_f(r0,r1) VLDR_F32(r0,r1,0)
838 # define vfp_ldr_d(r0,r1) VLDR_F64(r0,r1,0)
839 # define vfp_ldi_f(r0,i0) _vfp_ldi_f(_jit,r0,i0)
840 static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
841 # define vfp_ldi_d(r0,i0) _vfp_ldi_d(_jit,r0,i0)
842 static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
843 # define vfp_ldxr_f(r0,r1,r2) _vfp_ldxr_f(_jit,r0,r1,r2)
844 static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
845 # define vfp_ldxr_d(r0,r1,r2) _vfp_ldxr_d(_jit,r0,r1,r2)
846 static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
847 # define vfp_ldxi_f(r0,r1,i0) _vfp_ldxi_f(_jit,r0,r1,i0)
848 static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
849 # define vfp_ldxi_d(r0,r1,i0) _vfp_ldxi_d(_jit,r0,r1,i0)
850 static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
851 # define vfp_unldr_x(r0, r1, i0) _vfp_unldr_x(_jit, r0, r1, i0)
852 static void _vfp_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
853 # define vfp_unldi_x(r0, i0, i1) _vfp_unldi_x(_jit, r0, i0, i1)
854 static void _vfp_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
855 # define vfp_str_f(r0,r1) VSTR_F32(r1,r0,0)
856 # define vfp_str_d(r0,r1) VSTR_F64(r1,r0,0)
857 # define vfp_sti_f(i0,r0) _vfp_sti_f(_jit,i0,r0)
858 static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
859 # define vfp_sti_d(i0,r0) _vfp_sti_d(_jit,i0,r0)
860 static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
861 # define vfp_stxr_f(r0,r1,r2) _vfp_stxr_f(_jit,r0,r1,r2)
862 static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
863 # define vfp_stxr_d(r0,r1,r2) _vfp_stxr_d(_jit,r0,r1,r2)
864 static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
865 # define vfp_stxi_f(i0,r0,r1) _vfp_stxi_f(_jit,i0,r0,r1)
866 static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
867 # define vfp_stxi_d(i0,r0,r1) _vfp_stxi_d(_jit,i0,r0,r1)
868 static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
869 #define vfp_unstr_x(r0, r1, i0) _vfp_unstr_x(_jit, r0, r1, i0)
870 static void _vfp_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
871 #define vfp_unsti_x(i0, r0, i1) _vfp_unsti_x(_jit, i0, r0, i1)
872 static void _vfp_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
873 # define vfp_vaarg_d(r0, r1) _vfp_vaarg_d(_jit, r0, r1)
874 static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
878 # define vfp_regno(rn) (((rn) - 16) >> 1)
881 encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
883 int code, mode, imm, mask;
888 * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
890 for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
892 if (imm != mask && imm != 0)
895 if (imm != mask && imm != 0)
899 imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
900 ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >> 3) |
901 ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
902 ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >> 7));
908 * 00000000 00000000 00000000 abcdefgh
909 * 00000000 00000000 abcdefgh 00000000
910 * 00000000 abcdefgh 00000000 00000000
911 * abcdefgh 00000000 00000000 00000000 */
912 for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
913 if ((lo & mask) == lo) {
914 imm = lo >> (mode << 3);
920 * 00000000 abcdefgh 00000000 abcdefgh
921 * abcdefgh 00000000 abcdefgh 00000000 */
922 for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
923 if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
924 imm = lo >> (mode << 3);
925 mode = 0x800 | (mode << 9);
931 * 00000000 00000000 abcdefgh 11111111
932 * 00000000 abcdefgh 11111111 11111111 */
933 for (mode = 0, mask = 0xff; mode < 2;
934 mask = (mask << 8) | 0xff, mode++) {
935 if ((lo & mask) == mask &&
936 !((lo & ~mask) >> 8) &&
937 (imm = lo >> (8 + (mode << 8)))) {
938 mode = 0xc00 | (mode << 8);
944 * aBbbbbbc defgh000 00000000 00000000
945 * from the ARM Architecture Reference Manual:
946 * In this entry, B = NOT(b). The bit pattern represents the
947 * floating-point number (-1)^s* 2^exp * mantissa, where
949 * exp = UInt(NOT(b):c:d)-3 and
950 * mantissa = (16+UInt(e:f:g:h))/16. */
951 if ((lo & 0x7ffff) == 0 &&
952 (((lo & 0x7e000000) == 0x3e000000) ||
953 ((lo & 0x7e000000) == 0x40000000))) {
955 imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
962 /* need another approach (load from memory, move from arm register, etc) */
966 code = inv ? ARM_VMVNI : ARM_VMOVI;
967 switch ((mode & 0xf00) >> 8) {
968 case 0x0: case 0x2: case 0x4: case 0x6:
970 if (inv) mode |= 0x20;
971 if (!mov) mode |= 0x100;
973 case 0x1: case 0x3: case 0x5: case 0x7:
974 /* should actually not reach here */
980 /* should actually not reach here */
987 assert(!(mode & 0x20));
990 imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
993 if (code & 0x1000000)
1004 _vodi(jit_state_t *_jit, int oi, int r0)
1007 assert(!(oi & 0x0000f000));
1008 assert(!(r0 & 1)); r0 = vfp_regno(r0);
1009 thumb.i = oi|(_u4(r0)<<12);
1011 iss(thumb.s[0], thumb.s[1]);
1017 _voqi(jit_state_t *_jit, int oi, int r0)
1020 assert(!(oi & 0x0000f000));
1021 assert(!(r0 & 3)); r0 = vfp_regno(r0);
1022 thumb.i = oi|(_u4(r0)<<12);
1024 iss(thumb.s[0], thumb.s[1]);
1030 _cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
1033 assert(!(cc & 0x0fffffff));
1034 assert(!(o & 0xf000f00f));
1035 if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0);
1036 if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1);
1037 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1039 iss(thumb.s[0], thumb.s[1]);
1045 _cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
1048 assert(!(cc & 0x0fffffff));
1049 assert(!(o & 0xf000f00f));
1050 assert(!(r0 & 1) && !(r1 & 1));
1051 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1052 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1054 iss(thumb.s[0], thumb.s[1]);
1060 _cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
1063 assert(!(cc & 0x0fffffff));
1064 assert(!(o & 0xf000f00f));
1065 assert(!(r0 & 3) && !(r1 & 1));
1066 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1067 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1069 iss(thumb.s[0], thumb.s[1]);
1075 _cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
1078 assert(!(cc & 0x0fffffff));
1079 assert(!(o & 0xf000f00f));
1080 assert(!(r0 & 3) && !(r1 & 3));
1081 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1082 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1084 iss(thumb.s[0], thumb.s[1]);
1090 _cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1093 assert(!(cc & 0x0fffffff));
1094 assert(!(o & 0xf000f00f));
1095 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1097 iss(thumb.s[0], thumb.s[1]);
1103 _cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1106 assert(!(cc & 0x0fffffff));
1107 assert(!(o & 0xf000f00f));
1108 if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1);
1109 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1111 iss(thumb.s[0], thumb.s[1]);
1117 _cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1120 assert(!(cc & 0x0fffffff));
1121 assert(!(o & 0xf000f00f));
1122 if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1);
1123 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1125 iss(thumb.s[0], thumb.s[1]);
1131 _cc_vo_vv(jit_state_t *_jit, int cc, int o, int r0, int r1)
1134 assert(!(cc & 0x0fffffff));
1135 assert(!(o & 0xf000f00f));
1138 thumb.i = cc|o|(_u4(r1)<<12)|_u4(r0);
1140 iss(thumb.s[0], thumb.s[1]);
1146 _cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1149 assert(!(cc & 0x0fffffff));
1150 assert(!(o & 0xf000f00f));
1151 /* use same bit pattern, to set opc1... */
1152 if (r1 & 1) o |= ARM_V_I32; r1 = vfp_regno(r1);
1153 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1155 iss(thumb.s[0], thumb.s[1]);
1161 _cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1164 assert(!(cc & 0x0fffffff));
1165 assert(!(o & 0xf00ff00f));
1168 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1170 iss(thumb.s[0], thumb.s[1]);
1176 _cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1179 assert(!(cc & 0x0fffffff));
1180 assert(!(o & 0xf00ff00f));
1181 if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0);
1182 if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1);
1183 if (r2 & 1) o |= ARM_V_M; r2 = vfp_regno(r2);
1184 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1186 iss(thumb.s[0], thumb.s[1]);
1192 _cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1195 assert(!(cc & 0x0fffffff));
1196 assert(!(o & 0xf00ff00f));
1197 assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
1198 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1199 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1201 iss(thumb.s[0], thumb.s[1]);
1207 _cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1210 assert(!(cc & 0x0fffffff));
1211 assert(!(o & 0xf00ff00f));
1212 assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
1213 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1214 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1216 iss(thumb.s[0], thumb.s[1]);
1222 _cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1225 assert(!(cc & 0x0fffffff));
1226 assert(!(o & 0xf00ff00f));
1227 assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
1228 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1229 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1231 iss(thumb.s[0], thumb.s[1]);
1237 _cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1240 assert(!(cc & 0x0fffffff));
1241 assert(!(o & 0xf00ff00f));
1242 assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
1243 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1244 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1246 iss(thumb.s[0], thumb.s[1]);
1252 _cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1255 /* i0 << 2 is byte offset */
1256 assert(!(cc & 0x0fffffff));
1257 assert(!(o & 0xf00ff0ff));
1259 assert(!(o & ARM_V_F64));
1263 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
1265 iss(thumb.s[0], thumb.s[1]);
1271 _cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1274 assert(!(cc & 0x0fffffff));
1275 assert(!(o & 0xf00ff0ff));
1276 /* save i0 double precision registers */
1277 if (o & ARM_V_F64) i0 <<= 1;
1278 /* if (r1 & 1) cc & ARM_V_F64 must be false */
1279 if (r1 & 1) o |= ARM_V_D; r1 = vfp_regno(r1);
1280 assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
1281 thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
1283 iss(thumb.s[0], thumb.s[1]);
1289 _vfp_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1292 reg = jit_get_reg(jit_class_fpr);
1293 VMOV_S_A(rn(reg), r1);
1294 VCNT(rn(reg), rn(reg));
1295 VADD_I8(rn(reg), rn(reg), rn(reg));
1296 VMOV_A_S(r0, rn(reg));
1301 _vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1303 assert(jit_fpr_p(r0) && jit_fpr_p(r1));
1309 _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
1318 assert(jit_fpr_p(r0));
1319 /* float arguments are packed, for others,
1320 * lightning only address even registers */
1321 if (!(r0 & 1) && (r0 - 32) >= 0 &&
1322 ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
1323 (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
1326 reg = jit_get_reg(jit_class_gpr);
1328 VMOV_S_A(r0, rn(reg));
1334 _vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1336 assert(jit_fpr_p(r0) && jit_fpr_p(r1));
1342 _vfp_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1345 reg = jit_get_reg(jit_class_gpr);
1347 vfp_movr_w_f(r0, rn(reg));
1352 _vfp_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
1355 t0 = jit_get_reg(jit_class_gpr);
1356 t1 = jit_get_reg(jit_class_gpr);
1359 vfp_movr_ww_d(r0, rn(t0), rn(t1));
1365 _vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
1372 jit_int32_t rg0, rg1;
1374 # if __BYTE_ORDER == __BIG_ENDIAN
1379 assert(jit_fpr_p(r0));
1380 if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
1381 (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
1384 rg0 = jit_get_reg(jit_class_gpr);
1385 rg1 = jit_get_reg(jit_class_gpr);
1386 movi(rn(rg0), u.i[0]);
1387 movi(rn(rg1), u.i[1]);
1388 VMOV_D_AA(r0, rn(rg0), rn(rg1));
1395 _vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1398 if (jit_fpr_p(r1)) {
1400 VCVT_F64_F32(r0, r1);
1402 reg = jit_get_reg(jit_class_fpr);
1403 VCVT_F64_F32(rn(reg), r1);
1404 VMOV_A_S(r0, rn(reg));
1409 reg = jit_get_reg(jit_class_fpr);
1410 VMOV_S_A(rn(reg), r1);
1411 VCVT_F64_F32(rn(reg), rn(reg));
1413 VMOV_F32(r0, rn(reg));
1415 VMOV_A_S(r0, rn(reg));
1421 _vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1424 if (jit_fpr_p(r1)) {
1426 VCVT_F32_F64(r0, r1);
1428 reg = jit_get_reg(jit_class_fpr);
1429 VCVT_F32_F64(rn(reg), r1);
1430 VMOV_AA_D(r0, r0 + 1, rn(reg));
1435 reg = jit_get_reg(jit_class_fpr);
1436 VMOV_D_AA(rn(reg), r1, r1 + 1);
1437 VCVT_F32_F64(rn(reg), rn(reg));
1439 VMOV_F64(r0, rn(reg));
1441 VMOV_AA_D(r0, r0 + 1, rn(reg));
1447 _vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1450 if (jit_fpr_p(r0)) {
1452 VCVT_F32_S32(r0, r0);
1455 reg = jit_get_reg(jit_class_fpr);
1456 VMOV_V_I32(rn(reg), r1);
1457 VCVT_F32_S32(rn(reg), rn(reg));
1458 VMOV_F32(r0, rn(reg));
1464 _vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1467 if (jit_fpr_p(r0)) {
1469 VCVT_F64_S32(r0, r0);
1472 reg = jit_get_reg(jit_class_fpr);
1473 VMOV_V_I32(rn(reg), r1);
1474 VCVT_F64_S32(rn(reg), rn(reg));
1475 VMOV_F64(r0, rn(reg));
1481 _vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1484 reg = jit_get_reg(jit_class_fpr);
1486 VCVT_S32_F32(rn(reg), r1);
1488 VMOV_V_I32(rn(reg), r1);
1489 VCVT_S32_F32(rn(reg), rn(reg));
1491 VMOV_A_S32(r0, rn(reg));
1496 _vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1499 reg = jit_get_reg(jit_class_fpr);
1501 VCVT_S32_F64(rn(reg), r1);
1503 VMOV_V_I32(rn(reg), r1);
1504 VCVT_S32_F64(rn(reg), rn(reg));
1506 VMOV_A_S32(r0, rn(reg));
1511 _vfp_fmar_f(jit_state_t *_jit,
1512 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1516 if (0 && jit_cpu.vfp >= 4) {
1517 if (r0 != r2 && r0 != r3) {
1519 VFMA_F32(r0, r2, r3);
1522 t0 = jit_get_reg(jit_class_fpr);
1523 vfp_movr_f(rn(t0), r1);
1524 VFMA_F32(rn(t0), r2, r3);
1525 vfp_movr_f(r0, rn(t0));
1531 vfp_mulr_f(r0, r1, r2);
1532 vfp_addr_f(r0, r0, r3);
1535 t0 = jit_get_reg(jit_class_fpr);
1536 vfp_mulr_f(rn(t0), r1, r2);
1537 vfp_addr_f(r0, rn(t0), r3);
1544 _vfp_fmsr_f(jit_state_t *_jit,
1545 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1549 if (0 && jit_cpu.vfp >= 4) {
1550 if (r0 != r2 && r0 != r3) {
1552 VFMS_F32(r0, r2, r3);
1555 t0 = jit_get_reg(jit_class_fpr);
1556 vfp_movr_f(rn(t0), r1);
1557 VFMS_F32(rn(t0), r2, r3);
1558 vfp_movr_f(r0, rn(t0));
1565 vfp_mulr_f(r0, r1, r2);
1566 vfp_subr_f(r0, r0, r3);
1569 t0 = jit_get_reg(jit_class_fpr);
1570 vfp_mulr_f(rn(t0), r1, r2);
1571 vfp_subr_f(r0, rn(t0), r3);
1578 _vfp_fnmar_f(jit_state_t *_jit,
1579 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1583 if (0 && jit_cpu.vfp >= 4) {
1584 if (r0 != r2 && r0 != r3) {
1586 VFNMA_F32(r0, r2, r3);
1589 t0 = jit_get_reg(jit_class_fpr);
1590 vfp_movr_f(rn(t0), r1);
1591 VFNMA_F32(rn(t0), r2, r3);
1592 vfp_movr_f(r0, rn(t0));
1597 t0 = jit_get_reg(jit_class_fpr);
1598 vfp_negr_f(rn(t0), r1);
1599 vfp_mulr_f(rn(t0), rn(t0), r2);
1600 vfp_subr_f(r0, rn(t0), r3);
1606 _vfp_fnmsr_f(jit_state_t *_jit,
1607 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1611 if (0 && jit_cpu.vfp >= 4) {
1612 if (r0 != r2 && r0 != r3) {
1614 VFNMS_F32(r0, r2, r3);
1617 t0 = jit_get_reg(jit_class_fpr);
1618 vfp_movr_f(rn(t0), r1);
1619 VFNMS_F32(rn(t0), r2, r3);
1620 vfp_movr_f(r0, rn(t0));
1626 t0 = jit_get_reg(jit_class_fpr);
1627 vfp_negr_f(rn(t0), r1);
1628 vfp_mulr_f(rn(t0), rn(t0), r2);
1629 vfp_addr_f(r0, rn(t0), r3);
1635 _vfp_fmar_d(jit_state_t *_jit,
1636 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1640 if (0 && jit_cpu.vfp >= 4) {
1641 if (r0 != r2 && r0 != r3) {
1643 VFMA_F64(r0, r2, r3);
1646 t0 = jit_get_reg(jit_class_fpr);
1647 vfp_movr_d(rn(t0), r1);
1648 VFMA_F64(rn(t0), r2, r3);
1649 vfp_movr_d(r0, rn(t0));
1655 vfp_mulr_d(r0, r1, r2);
1656 vfp_addr_d(r0, r0, r3);
1659 t0 = jit_get_reg(jit_class_fpr);
1660 vfp_mulr_d(rn(t0), r1, r2);
1661 vfp_addr_d(r0, rn(t0), r3);
1668 _vfp_fmsr_d(jit_state_t *_jit,
1669 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1673 if (0 && jit_cpu.vfp >= 4) {
1674 if (r0 != r2 && r0 != r3) {
1676 VFMS_F64(r0, r2, r3);
1679 t0 = jit_get_reg(jit_class_fpr);
1680 vfp_movr_d(rn(t0), r1);
1681 VFMS_F64(rn(t0), r2, r3);
1682 vfp_movr_d(r0, rn(t0));
1689 vfp_mulr_d(r0, r1, r2);
1690 vfp_subr_d(r0, r0, r3);
1693 t0 = jit_get_reg(jit_class_fpr);
1694 vfp_mulr_d(rn(t0), r1, r2);
1695 vfp_subr_d(r0, rn(t0), r3);
1702 _vfp_fnmar_d(jit_state_t *_jit,
1703 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1707 if (0 && jit_cpu.vfp >= 4) {
1708 if (r0 != r2 && r0 != r3) {
1710 VFNMA_F64(r0, r2, r3);
1713 t0 = jit_get_reg(jit_class_fpr);
1714 vfp_movr_d(rn(t0), r1);
1715 VFNMA_F64(rn(t0), r2, r3);
1716 vfp_movr_d(r0, rn(t0));
1721 t0 = jit_get_reg(jit_class_fpr);
1722 vfp_negr_d(rn(t0), r1);
1723 vfp_mulr_d(rn(t0), rn(t0), r2);
1724 vfp_subr_d(r0, rn(t0), r3);
1730 _vfp_fnmsr_d(jit_state_t *_jit,
1731 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1735 if (0 && jit_cpu.vfp >= 4) {
1736 if (r0 != r2 && r0 != r3) {
1738 VFNMS_F64(r0, r2, r3);
1741 t0 = jit_get_reg(jit_class_fpr);
1742 vfp_movr_d(rn(t0), r1);
1743 VFNMS_F64(rn(t0), r2, r3);
1744 vfp_movr_d(r0, rn(t0));
1750 t0 = jit_get_reg(jit_class_fpr);
1751 vfp_negr_d(rn(t0), r1);
1752 vfp_mulr_d(rn(t0), rn(t0), r2);
1753 vfp_addr_d(r0, rn(t0), r3);
1758 # define fopi(name) \
1760 _vfp_##name##i_f(jit_state_t *_jit, \
1761 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \
1763 jit_int32_t reg = jit_get_reg(jit_class_fpr); \
1764 vfp_movi_f(rn(reg), i0); \
1765 vfp_##name##r_f(r0, r1, rn(reg)); \
1766 jit_unget_reg(reg); \
1768 # define dopi(name) \
1770 _vfp_##name##i_d(jit_state_t *_jit, \
1771 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \
1773 jit_int32_t reg = jit_get_reg(jit_class_fpr); \
1774 vfp_movi_d(rn(reg), i0); \
1775 vfp_##name##r_d(r0, r1, rn(reg)); \
1776 jit_unget_reg(reg); \
1778 # define fbopi(name) \
1780 _vfp_b##name##i_f(jit_state_t *_jit, \
1781 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \
1784 jit_int32_t reg = jit_get_reg(jit_class_fpr| \
1785 jit_class_nospill); \
1786 vfp_movi_f(rn(reg), i0); \
1787 word = vfp_b##name##r_f(r0, r1, rn(reg)); \
1788 jit_unget_reg(reg); \
1791 # define dbopi(name) \
1793 _vfp_b##name##i_d(jit_state_t *_jit, \
1794 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \
1797 jit_int32_t reg = jit_get_reg(jit_class_fpr| \
1798 jit_class_nospill); \
1799 vfp_movi_d(rn(reg), i0); \
1800 word = vfp_b##name##r_d(r0, r1, rn(reg)); \
1801 jit_unget_reg(reg); \
1817 _vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1819 jit_int32_t rg0, rg1;
1820 if (jit_fpr_p(r0)) {
1824 rg1 = jit_get_reg(jit_class_fpr);
1825 VMOV_S_A(rn(rg1), r1);
1826 VCMP_F32(r0, rn(rg1));
1831 rg0 = jit_get_reg(jit_class_fpr);
1832 VMOV_S_A(rn(rg0), r0);
1834 VCMP_F32(rn(rg0), r1);
1836 rg1 = jit_get_reg(jit_class_fpr);
1837 VMOV_S_A(rn(rg1), r1);
1838 VCMP_F32(rn(rg0), rn(rg1));
1846 _vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1848 jit_int32_t rg0, rg1;
1849 if (jit_fpr_p(r0)) {
1853 rg1 = jit_get_reg(jit_class_fpr);
1854 VMOV_D_AA(rn(rg1), r1, r1 + 1);
1855 VCMP_F64(r0, rn(rg1));
1860 rg0 = jit_get_reg(jit_class_fpr);
1861 VMOV_D_AA(rn(rg0), r0, r0 + 1);
1863 VCMP_F64(rn(rg0), r1);
1865 rg1 = jit_get_reg(jit_class_fpr);
1866 VMOV_D_AA(rn(rg1), r1, r1 + 1);
1867 VCMP_F64(rn(rg0), rn(rg1));
1875 _vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
1878 if (jit_thumb_p()) {
1879 if ((c0 ^ c1) >> 28 == 1) {
1912 _vcmp01_f(jit_state_t *_jit, int c0, int c1,
1913 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1916 vcmp01_x(c0, c1, r0);
1920 _vcmp01_d(jit_state_t *_jit, int c0, int c1,
1921 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1924 vcmp01_x(c0, c1, r0);
1928 _vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1930 if (jit_thumb_p()) {
1951 _vcmp_10_f(jit_state_t *_jit, int cc,
1952 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1959 _vcmp_10_d(jit_state_t *_jit, int cc,
1960 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1984 _vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
1987 if (jit_thumb_p()) {
2003 CC_MOVI(ARM_CC_NE, r0, 0);
2004 CC_MOVI(ARM_CC_EQ, r0, 1);
2005 CC_MOVI(ARM_CC_VS, r0, 1);
2010 _vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2019 _vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2028 _vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
2030 if (jit_thumb_p()) {
2052 _vcmp_01_f(jit_state_t *_jit, int cc,
2053 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2060 _vcmp_01_d(jit_state_t *_jit, int cc,
2061 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2073 _vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
2076 if (jit_thumb_p()) {
2092 CC_MOVI(ARM_CC_NE, r0, 1);
2093 CC_MOVI(ARM_CC_EQ, r0, 0);
2094 CC_MOVI(ARM_CC_VS, r0, 0);
2099 _vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2108 _vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2117 _vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2120 vcmp10_x(ARM_CC_VS, r0);
2126 _vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2129 vcmp10_x(ARM_CC_VS, r0);
2135 _vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2138 vcmp_01_x(ARM_CC_VS, r0);
2144 _vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2147 vcmp_01_x(ARM_CC_VS, r0);
2153 _vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
2158 if (jit_thumb_p()) {
2159 d = ((i0 - w) >> 1) - 2;
2161 T2_CC_B(cc, encode_thumb_cc_jump(d));
2164 d = ((i0 - w) >> 2) - 2;
2166 CC_B(cc, d & 0x00ffffff);
2173 _vbcmp_f(jit_state_t *_jit, int cc,
2174 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2177 return (vbcmp_x(cc, i0));
2181 _vbcmp_d(jit_state_t *_jit, int cc,
2182 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2185 return (vbcmp_x(cc, i0));
2189 _vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
2194 if (jit_thumb_p()) {
2197 d = ((i0 - w) >> 1) - 2;
2199 T2_B(encode_thumb_jump(d));
2204 d = ((i0 - w) >> 2) - 2;
2208 patch_at(arm_patch_jump, p, _jit->pc.w);
2213 _vbncmp_f(jit_state_t *_jit, int cc,
2214 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2217 return (vbncmp_x(cc, i0));
2221 _vbncmp_d(jit_state_t *_jit, int cc,
2222 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2225 return (vbncmp_x(cc, i0));
2246 _vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
2248 jit_word_t d, p, q, w;
2251 if (jit_thumb_p()) {
2252 T2_CC_B(ARM_CC_VS, 0);
2254 T2_CC_B(ARM_CC_NE, 0);
2255 patch_at(arm_patch_jump, p, _jit->pc.w);
2257 d = ((i0 - w) >> 1) - 2;
2259 T2_B(encode_thumb_jump(d));
2265 patch_at(arm_patch_jump, p, _jit->pc.w);
2267 d = ((i0 - w) >> 2) - 2;
2271 patch_at(arm_patch_jump, q, _jit->pc.w);
2276 _vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2279 return (vfp_buneqr_x(i0));
2285 _vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2288 return (vfp_buneqr_x(i0));
2294 _vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
2299 if (jit_thumb_p()) {
2300 T2_CC_B(ARM_CC_MI, 0);
2302 d = ((i0 - w) >> 1) - 2;
2304 T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
2309 d = ((i0 - w) >> 2) - 2;
2311 CC_B(ARM_CC_HS, d & 0x00ffffff);
2313 patch_at(arm_patch_jump, p, _jit->pc.w);
2318 _vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2321 return (vfp_bunger_x(i0));
2327 _vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2330 return (vfp_bunger_x(i0));
2336 _vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
2338 jit_word_t d, p, q, w;
2341 if (jit_thumb_p()) {
2342 T2_CC_B(ARM_CC_VS, 0);
2344 T2_CC_B(ARM_CC_EQ, 0);
2346 d = ((i0 - w) >> 1) - 2;
2348 T2_B(encode_thumb_jump(d));
2355 d = ((i0 - w) >> 2) - 2;
2359 patch_at(arm_patch_jump, p, _jit->pc.w);
2360 patch_at(arm_patch_jump, q, _jit->pc.w);
2365 _vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2368 return (vfp_bltgtr_x(i0));
2376 _vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2379 return (vfp_bltgtr_x(i0));
2389 _vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2392 if (jit_fpr_p(r0)) {
2393 gpr = jit_get_reg(jit_class_gpr);
2395 VLDR_F32(r0, rn(gpr), 0);
2403 _vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2406 reg = jit_get_reg(jit_class_gpr);
2409 VLDR_F64(r0, rn(reg), 0);
2412 ldxi_i(r0 + 1, rn(reg), 4);
2418 _vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2421 if (jit_fpr_p(r0)) {
2422 reg = jit_get_reg(jit_class_gpr);
2423 addr(rn(reg), r1, r2);
2424 VLDR_F32(r0, rn(reg), 0);
2432 _vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2435 reg = jit_get_reg(jit_class_gpr);
2436 addr(rn(reg), r1, r2);
2438 VLDR_F64(r0, rn(reg), 0);
2441 ldxi_i(r0 + 1, rn(reg), 4);
2447 _vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2450 if (jit_fpr_p(r0)) {
2454 VLDR_F32(r0, r1, i0 >> 2);
2456 reg = jit_get_reg(jit_class_gpr);
2457 addi(rn(reg), r1, i0);
2458 VLDR_F32(r0, rn(reg), 0);
2466 VLDRN_F32(r0, r1, i0 >> 2);
2468 reg = jit_get_reg(jit_class_gpr);
2469 subi(rn(reg), r1, i0);
2470 VLDR_F32(r0, rn(reg), 0);
2480 _vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2483 if (jit_fpr_p(r0)) {
2487 VLDR_F64(r0, r1, i0 >> 2);
2489 reg = jit_get_reg(jit_class_gpr);
2490 addi(rn(reg), r1, i0);
2491 VLDR_F64(r0, rn(reg), 0);
2499 VLDRN_F64(r0, r1, i0 >> 2);
2501 reg = jit_get_reg(jit_class_gpr);
2502 subi(rn(reg), r1, i0);
2503 VLDR_F64(r0, rn(reg), 0);
2509 reg = jit_get_reg(jit_class_gpr);
2510 addi(rn(reg), r1, i0);
2512 ldxi_i(r0 + 1, rn(reg), 4);
2518 _vfp_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2522 assert(i0 == 4 || i0 == 8);
2523 if (jit_vfp_unaligned_p()) {
2524 t0 = jit_get_reg(jit_class_gpr);
2527 if (jit_unaligned_p())
2531 vfp_movr_w_f(r0, r2);
2534 t1 = jit_get_reg(jit_class_gpr);
2536 #if __BYTE_ORDER == __LITTLE_ENDIAN
2537 if (jit_unaligned_p()) {
2547 if (jit_unaligned_p()) {
2557 vfp_movr_ww_d(r0, r2, r3);
2571 _vfp_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
2575 assert(i1 == 4 || i1 == 8);
2576 if (jit_vfp_unaligned_p()) {
2577 t0 = jit_get_reg(jit_class_gpr);
2581 vfp_movr_w_f(r0, r2);
2584 t1 = jit_get_reg(jit_class_gpr);
2586 #if __BYTE_ORDER == __LITTLE_ENDIAN
2587 if (jit_unaligned_p()) {
2589 unldi(r3, i0 + 4, 4);
2596 if (jit_unaligned_p()) {
2598 unldi(r2, i0 + 4, 4);
2605 vfp_movr_ww_d(r0, r3, r2);
2619 _vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2622 if (jit_fpr_p(r0)) {
2623 reg = jit_get_reg(jit_class_gpr);
2625 VSTR_F32(r0, rn(reg), 0);
2633 _vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2636 reg = jit_get_reg(jit_class_gpr);
2639 VSTR_F64(r0, rn(reg), 0);
2642 stxi_i(4, rn(reg), r0 + 1);
2648 _vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2651 if (jit_fpr_p(r2)) {
2652 reg = jit_get_reg(jit_class_gpr);
2653 addr(rn(reg), r0, r1);
2654 VSTR_F32(r2, rn(reg), 0);
2662 _vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2665 reg = jit_get_reg(jit_class_gpr);
2666 addr(rn(reg), r0, r1);
2668 VSTR_F64(r2, rn(reg), 0);
2671 stxi_i(4, rn(reg), r2 + 1);
2677 _vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2680 if (jit_fpr_p(r1)) {
2684 VSTR_F32(r1, r0, i0 >> 2);
2686 reg = jit_get_reg(jit_class_gpr);
2687 addi(rn(reg), r0, i0);
2688 VSTR_F32(r1, rn(reg), 0);
2696 VSTRN_F32(r1, r0, i0 >> 2);
2698 reg = jit_get_reg(jit_class_gpr);
2699 subi(rn(reg), r0, i0);
2700 VSTR_F32(r1, rn(reg), 0);
2710 _vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2713 if (jit_fpr_p(r1)) {
2717 VSTR_F64(r1, r0, i0 >> 2);
2719 reg = jit_get_reg(jit_class_gpr);
2720 addi(rn(reg), r0, i0);
2721 VSTR_F64(r1, rn(reg), 0);
2729 VSTRN_F64(r1, r0, i0 >> 2);
2731 reg = jit_get_reg(jit_class_gpr);
2732 subi(rn(reg), r0, i0);
2733 VSTR_F64(r1, rn(reg), 0);
2739 reg = jit_get_reg(jit_class_gpr);
2740 addi(rn(reg), r0, i0);
2742 stxi_i(4, rn(reg), r1 + 1);
2748 _vfp_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2752 assert(i0 == 4 || i0 == 8);
2753 if (jit_vfp_unaligned_p()) {
2754 t0 = jit_get_reg(jit_class_gpr);
2757 vfp_movr_f_w(r2, r1);
2758 if (jit_unaligned_p())
2764 t1 = jit_get_reg(jit_class_gpr);
2766 vfp_movr_d_ww(r2, r3, r1);
2767 #if __BYTE_ORDER == __LITTLE_ENDIAN
2768 if (jit_unaligned_p()) {
2778 if (jit_unaligned_p()) {
2801 _vfp_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2805 assert(i1 == 4 || i1 == 8);
2806 if (jit_vfp_unaligned_p()) {
2807 t0 = jit_get_reg(jit_class_gpr);
2810 vfp_movr_f_w(r2, r0);
2811 if (jit_unaligned_p())
2817 t1 = jit_get_reg(jit_class_gpr);
2819 vfp_movr_d_ww(r2, r3, r0);
2820 #if __BYTE_ORDER == __LITTLE_ENDIAN
2821 if (jit_unaligned_p()) {
2823 unsti(i0 + 4, r2, 4);
2830 if (jit_unaligned_p()) {
2832 unsti(i0 + 4, r3, 4);
2852 _vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2856 assert(_jitc->function->self.call & jit_call_varargs);
2858 /* Adjust pointer. */
2859 reg = jit_get_reg(jit_class_gpr);
2860 andi(rn(reg), r1, 7);
2861 addr(r1, r1, rn(reg));
2864 /* Load argument. */
2867 /* Update stack pointer. */
2868 addi(r1, r1, sizeof(jit_float64_t));