Update lightrec 20220910 (#686)
[pcsx_rearmed.git] / deps / lightning / lib / jit_arm-vfp.c
CommitLineData
4a71579b
PC
1/*
2 * Copyright (C) 2012-2019 Free Software Foundation, Inc.
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20#if PROTO
21/* as per vfp_regno macro, required due to "support" to soft float registers
22 * or using integer registers as arguments to float operations */
23# define _D8_REGNO 32
24# define ARM_V_Q 0x00000040
25# define FPSCR_N 0x80000000 /* Negative flag */
26# define FPSCR_Z 0x40000000 /* Zero flag */
27# define FPSCR_C 0x20000000 /* Carry flag */
28# define FPSCR_V 0x10000000 /* Overflow flag */
29# define FPSCR_QC 0x08000000 /* Cumulative saturation */
30# define FPSCR_AHP 0x04000000 /* Alt. half-precision */
31# define FPSCR_DN 0x02000000 /* Default NaN mode */
32# define FPSCR_FZ 0x01000000 /* Flush to zero */
33# define FPSCR_RMASK 0x00c00000
34# define FPSCR_RN 0x00000000 /* Round to Nearest */
35# define FPSCR_RP 0x00400000 /* Round to Plus Infinity */
36# define FPSCR_RM 0x00800000 /* Round to Minus Infinity */
37# define FPSCR_RZ 0x00c00000 /* Round towards Zero */
38# define FPSCR_STRIDE 0x00300000
39# define FPSCR_RES1 0x00080000 /* Reserved, UNK/SBZP */
40# define FPSCR_LEN 0x00070000
41# define FPSCR_IDE 0x00008000 /* Input Denormal trap */
42# define FPSCR_IXE 0x00001000 /* Inexact trap */
43# define FPSCR_UFE 0x00000800 /* Underflow trap */
44# define FPSCR_OFE 0x00000400 /* Overflow trap */
45# define FPSCR_DZE 0x00000200 /* Division by zero trap */
46# define FPSCR_IOE 0x00000100 /* Invalid Operation trap */
47# define FPSCR_IDC 0x00000080 /* Input Denormal flag */
48# define FPSCR_RES0 0x00000060 /* Reserved, UNK/SBZP */
49# define FPSCR_IXC 0x00000010 /* Inexact flag */
50# define FPSCR_UFC 0x00000008 /* Underflow flag */
51# define FPSCR_OFC 0x00000004 /* Overflow flag */
52# define FPSCR_DZC 0x00000002 /* Division by zero flag */
53# define FPSCR_IOC 0x00000001 /* Invalid Operation flag */
54# define ARM_V_E 0x00000080 /* ARM_VCMP except if NaN */
55# define ARM_V_Z 0x00010000 /* ARM_VCMP with zero */
56# define ARM_V_F64 0x00000100
57# define ARM_VADD_F 0x0e300a00
58# define ARM_VSUB_F 0x0e300a40
59# define ARM_VMUL_F 0x0e200a00
60# define ARM_VDIV_F 0x0e800a00
61# define ARM_VABS_F 0x0eb00ac0
62# define ARM_VNEG_F 0x0eb10a40
63# define ARM_VSQRT_F 0x0eb10ac0
64# define ARM_VMOV_F 0x0eb00a40
65# define ARM_VMOV_A_S 0x0e100a10 /* vmov rn, sn */
66# define ARM_VMOV_S_A 0x0e000a10 /* vmov sn, rn */
67# define ARM_VMOV_AA_D 0x0c500b10 /* vmov rn,rn, dn */
68# define ARM_VMOV_D_AA 0x0c400b10 /* vmov dn, rn,rn */
69# define ARM_VCMP 0x0eb40a40
70# define ARM_VMRS 0x0ef10a10
71# define ARM_VMSR 0x0ee10a10
72# define ARM_VCVT_2I 0x00040000 /* to integer */
73# define ARM_VCVT_2S 0x00010000 /* to signed */
74# define ARM_VCVT_RS 0x00000080 /* round to zero or signed */
75# define ARM_VCVT 0x0eb80a40
76# define ARM_VCVT_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
77# define ARM_VCVT_U32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
78# define ARM_VCVT_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
79# define ARM_VCVT_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
80# define ARM_VCVT_F32_S32 ARM_VCVT|ARM_VCVT_RS
81# define ARM_VCVT_F32_U32 ARM_VCVT
82# define ARM_VCVT_F64_S32 ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
83# define ARM_VCVT_F64_U32 ARM_VCVT|ARM_V_F64
84# define ARM_VCVT_F 0x0eb70ac0
85# define ARM_VCVT_F32_F64 ARM_VCVT_F
86# define ARM_VCVT_F64_F32 ARM_VCVT_F|ARM_V_F64
87# define ARM_VCVTR_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
88# define ARM_VCVTR_U32_F32 ARM_VCVT|ARM_VCVT_2I
89# define ARM_VCVTR_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
90# define ARM_VCVTR_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
91# define ARM_V_D 0x00400000
92# define ARM_V_N 0x00000080
93# define ARM_V_Q 0x00000040
94# define ARM_V_M 0x00000020
95# define ARM_V_U 0x01000000
96# define ARM_V_I16 0x00100000
97# define ARM_V_I32 0x00200000
98# define ARM_V_I64 0x00300000
99# define ARM_V_S16 0x00040000
100# define ARM_V_S32 0x00080000
101# define ARM_VADD_I 0x02000800
102# define ARM_VQADD_I 0x02000010 /* set flag on over/carry */
103# define ARM_VADDL_I 0x02800000 /* q=d+d */
104# define ARM_VADDW_I 0x02800100 /* q=q+d */
105# define ARM_VSUB_I 0x03000800
106# define ARM_VQSUB_I 0x02000210 /* set flag on over/carry */
107# define ARM_VSUBL_I 0x02800200
108# define ARM_VSUBW_I 0x02800300
109# define ARM_VMUL_I 0x02000910
110# define ARM_VMULL_I 0x02800c00
111# define ARM_VABS_I 0x03b10300
112# define ARM_VQABS_I 0x03b00700 /* sets flag on overflow */
113# define ARM_VNEG_I 0x03b10380
114# define ARM_VQNEG_I 0x03b00780 /* sets flag on overflow */
115# define ARM_VAND 0x02000110
116# define ARM_VBIC 0x02100110
117# define ARM_VORR 0x02200110
118# define ARM_VORN 0x02300110
119# define ARM_VEOR 0x03000110
120# define ARM_VMOVL_S8 0x00080000
121# define ARM_VMOVL_S16 0x00100000
122# define ARM_VMOVL_S32 0x00200000
123# define ARM_VMOVL_I 0x02800a10
124# define ARM_VMOVI 0x02800010
125# define ARM_VMVNI 0x02800030
126# define ARM_VLDR 0x0d100a00
127# define ARM_VSTR 0x0d000a00
128# define ARM_VM 0x0c000a00
129# define ARM_VMOV_ADV_U 0x00800000 /* zero extend */
130# define ARM_VMOV_ADV_8 0x00400000
131# define ARM_VMOV_ADV_16 0x00000020
132# define ARM_VMOV_A_D 0x0e100b10
133# define ARM_VMOV_D_A 0x0e000b10
134
135# define vodi(oi,r0) _vodi(_jit,oi,r0)
136static void _vodi(jit_state_t*,int,int) maybe_unused;
137# define voqi(oi,r0) _voqi(_jit,oi,r0)
138static void _voqi(jit_state_t*,int,int) maybe_unused;
139# define vo_ss(o,r0,r1) _cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
140# define cc_vo_ss(cc,o,r0,r1) _cc_vo_ss(_jit,cc,o,r0,r1)
141static void _cc_vo_ss(jit_state_t*,int,int,int,int);
142# define vo_dd(o,r0,r1) _cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
143# define cc_vo_dd(cc,o,r0,r1) _cc_vo_dd(_jit,cc,o,r0,r1)
144static void _cc_vo_dd(jit_state_t*,int,int,int,int);
145# define vo_qd(o,r0,r1) _cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
146# define cc_vo_qd(cc,o,r0,r1) _cc_vo_qd(_jit,cc,o,r0,r1)
147static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
148# define vo_qq(o,r0,r1) _cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
149# define cc_vo_qq(cc,o,r0,r1) _cc_vo_qq(_jit,cc,o,r0,r1)
150static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
151# define vorr_(o,r0,r1) _cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
152# define cc_vorr_(cc,o,r0,r1) _cc_vorr_(_jit,cc,o,r0,r1)
153static void _cc_vorr_(jit_state_t*,int,int,int,int);
154# define vors_(o,r0,r1) _cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
155# define cc_vors_(cc,o,r0,r1) _cc_vors_(_jit,cc,o,r0,r1)
156static void _cc_vors_(jit_state_t*,int,int,int,int);
157# define vorv_(o,r0,r1) _cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
158# define cc_vorv_(cc,o,r0,r1) _cc_vorv_(_jit,cc,o,r0,r1)
159static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
160# define vori_(o,r0,r1) _cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
161# define cc_vori_(cc,o,r0,r1) _cc_vori_(_jit,cc,o,r0,r1)
162static void _cc_vori_(jit_state_t*,int,int,int,int);
163# define vorrd(o,r0,r1,r2) _cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
164# define cc_vorrd(cc,o,r0,r1,r2) _cc_vorrd(_jit,cc,o,r0,r1,r2)
165static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
166# define vosss(o,r0,r1,r2) _cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
167# define cc_vosss(cc,o,r0,r1,r2) _cc_vosss(_jit,cc,o,r0,r1,r2)
168static void _cc_vosss(jit_state_t*,int,int,int,int,int);
169# define voddd(o,r0,r1,r2) _cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
170# define cc_voddd(cc,o,r0,r1,r2) _cc_voddd(_jit,cc,o,r0,r1,r2)
171static void _cc_voddd(jit_state_t*,int,int,int,int,int);
172# define voqdd(o,r0,r1,r2) _cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
173# define cc_voqdd(cc,o,r0,r1,r2) _cc_voqdd(_jit,cc,o,r0,r1,r2)
174static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
175# define voqqd(o,r0,r1,r2) _cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
176# define cc_voqqd(cc,o,r0,r1,r2) _cc_voqqd(_jit,cc,o,r0,r1,r2)
177static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
178# define voqqq(o,r0,r1,r2) _cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
179# define cc_voqqq(cc,o,r0,r1,r2) _cc_voqqq(_jit,cc,o,r0,r1,r2)
180static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
181# define cc_vldst(cc,o,r0,r1,i0) _cc_vldst(_jit,cc,o,r0,r1,i0)
182static void _cc_vldst(jit_state_t*,int,int,int,int,int);
183# define cc_vorsl(cc,o,r0,r1,i0) _cc_vorsl(_jit,cc,o,r0,r1,i0)
184static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
185# define CC_VADD_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
186# define VADD_F32(r0,r1,r2) CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
187# define CC_VADD_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
188# define VADD_F64(r0,r1,r2) CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
189# define CC_VSUB_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
190# define VSUB_F32(r0,r1,r2) CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
191# define CC_VSUB_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
192# define VSUB_F64(r0,r1,r2) CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
193# define CC_VMUL_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
194# define VMUL_F32(r0,r1,r2) CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
195# define CC_VMUL_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
196# define VMUL_F64(r0,r1,r2) CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
197# define CC_VDIV_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
198# define VDIV_F32(r0,r1,r2) CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
199# define CC_VDIV_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
200# define VDIV_F64(r0,r1,r2) CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
201# define CC_VABS_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VABS_F,r0,r1)
202# define VABS_F32(r0,r1) CC_VABS_F32(ARM_CC_AL,r0,r1)
203# define CC_VABS_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
204# define VABS_F64(r0,r1) CC_VABS_F64(ARM_CC_AL,r0,r1)
205# define CC_VNEG_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
206# define VNEG_F32(r0,r1) CC_VNEG_F32(ARM_CC_AL,r0,r1)
207# define CC_VNEG_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
208# define VNEG_F64(r0,r1) CC_VNEG_F64(ARM_CC_AL,r0,r1)
209# define CC_VSQRT_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
210# define VSQRT_F32(r0,r1) CC_VSQRT_F32(ARM_CC_AL,r0,r1)
211# define CC_VSQRT_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
212# define VSQRT_F64(r0,r1) CC_VSQRT_F64(ARM_CC_AL,r0,r1)
213# define CC_VMOV_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
214# define VMOV_F32(r0,r1) CC_VMOV_F32(ARM_CC_AL,r0,r1)
215# define CC_VMOV_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
216# define VMOV_F64(r0,r1) CC_VMOV_F64(ARM_CC_AL,r0,r1)
217# define CC_VMOV_AA_D(cc,r0,r1,r2) cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
218# define VMOV_AA_D(r0,r1,r2) CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
219# define CC_VMOV_D_AA(cc,r0,r1,r2) cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
220# define VMOV_D_AA(r0,r1,r2) CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
221# define CC_VMOV_A_S(cc,r0,r1) cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
222# define VMOV_A_S(r0,r1) CC_VMOV_A_S(ARM_CC_AL,r0,r1)
223# define CC_VMOV_S_A(cc,r0,r1) cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
224# define VMOV_S_A(r0,r1) CC_VMOV_S_A(ARM_CC_AL,r0,r1)
225# define CC_VCMP_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCMP,r0,r1)
226# define VCMP_F32(r0,r1) CC_VCMP_F32(ARM_CC_AL,r0,r1)
227# define CC_VCMP_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
228# define VCMP_F64(r0,r1) CC_VCMP_F64(ARM_CC_AL,r0,r1)
229# define CC_VCMPE_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
230# define VCMPE_F32(r0,r1) CC_VCMPE_F32(ARM_CC_AL,r0,r1)
231# define CC_VCMPE_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
232# define VCMPE_F64(r0,r1) CC_VCMPE_F64(ARM_CC_AL,r0,r1)
233# define CC_VCMPZ_F32(cc,r0) cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
234# define VCMPZ_F32(r0) CC_VCMPZ_F32(ARM_CC_AL,r0)
235# define CC_VCMPZ_F64(cc,r0) cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
236# define VCMPZ_F64(r0) CC_VCMPZ_F64(ARM_CC_AL,r0)
237# define CC_VCMPEZ_F32(cc,r0) cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
238# define VCMPEZ_F32(r0) CC_VCMPEZ_F32(ARM_CC_AL,r0)
239# define CC_VCMPEZ_F64(cc,r0) cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
240# define VCMPEZ_F64(r0) CC_VCMPEZ_F64(ARM_CC_AL,r0)
241# define CC_VMRS(cc,r0) cc_vorr_(cc,ARM_VMRS,r0,0)
242# define VMRS(r0) CC_VMRS(ARM_CC_AL,r0)
243# define CC_VMSR(cc,r0) cc_vorr_(cc,ARM_VMSR,r0,0)
244# define VMSR(r0) CC_VMSR(ARM_CC_AL,r0)
245# define CC_VCVT_S32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
246# define VCVT_S32_F32(r0,r1) CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
247# define CC_VCVT_U32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
248# define VCVT_U32_F32(r0,r1) CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
249# define CC_VCVT_S32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
250# define VCVT_S32_F64(r0,r1) CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
251# define CC_VCVT_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
252# define VCVT_U32_F64(r0,r1) CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
253# define CC_VCVT_F32_S32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
254# define VCVT_F32_S32(r0,r1) CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
255# define CC_VCVT_F32_U32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
256# define VCVT_F32_U32(r0,r1) CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
257# define CC_VCVT_F64_S32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
258# define VCVT_F64_S32(r0,r1) CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
259# define CC_VCVT_F64_U32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
260# define VCVT_F64_U32(r0,r1) CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
261# define CC_VCVT_F32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
262# define VCVT_F32_F64(r0,r1) CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
263# define CC_VCVT_F64_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
264# define VCVT_F64_F32(r0,r1) CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
265# define CC_VCVTR_S32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
266# define VCVTR_S32_F32(r0,r1) CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
267# define CC_VCVTR_U32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
268# define VCVTR_U32_F32(r0,r1) CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
269# define CC_VCVTR_S32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
270# define VCVTR_S32_F64(r0,r1) CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
271# define CC_VCVTR_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
272# define VCVTR_U32_F64(r0,r1) CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
273# define CC_VLDMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
274# define VLDMIA_F32(r0,r1,i0) CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
275# define CC_VLDMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
276# define VLDMIA_F64(r0,r1,i0) CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
277# define CC_VSTMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
278# define VSTMIA_F32(r0,r1,i0) CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
279# define CC_VSTMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
280# define VSTMIA_F64(r0,r1,i0) CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
281# define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
282# define VLDMIA_U_F32(r0,r1,i0) CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
283# define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
284# define VLDMIA_U_F64(r0,r1,i0) CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
285# define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
286# define VSTMIA_U_F32(r0,r1,i0) CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
287# define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
288# define VSTMIA_U_F64(r0,r1,i0) CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
289# define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
290# define VLDMDB_U_F32(r0,r1,i0) CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
291# define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
292# define VLDMDB_U_F64(r0,r1,i0) CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
293# define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
294# define VSTMDB_U_F32(r0,r1,i0) CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
295# define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
296# define VSTMDB_U_F64(r0,r1,i0) CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
297# define CC_VPUSH_F32(cc,r0,i0) CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
298# define VPUSH_F32(r0,i0) CC_VPUSH_F32(ARM_CC_AL,r0,i0)
299# define CC_VPUSH_F64(cc,r0,i0) CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
300# define VPUSH_F64(r0,i0) CC_VPUSH_F64(ARM_CC_AL,r0,i0)
301# define CC_VPOP_F32(cc,r0,i0) CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
302# define VPOP_F32(r0,i0) CC_VPOP_F32(ARM_CC_AL,r0,i0)
303# define CC_VPOP_F64(cc,r0,i0) CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
304# define VPOP_F64(r0,i0) CC_VPOP_F64(ARM_CC_AL,r0,i0)
305# define CC_VMOV_A_S8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
306# define VMOV_A_S8(r0,r1) CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
307# define CC_VMOV_A_U8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
308# define VMOV_A_U8(r0,r1) CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
309# define CC_VMOV_A_S16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
310# define VMOV_A_S16(r0,r1) CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
311# define CC_VMOV_A_U16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
312# define VMOV_A_U16(r0,r1) CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
313# define CC_VMOV_A_S32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
314# define VMOV_A_S32(r0,r1) CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
315# define CC_VMOV_A_U32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
316# define VMOV_A_U32(r0,r1) CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
317# define CC_VMOV_V_I8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
318# define VMOV_V_I8(r0,r1) CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
319# define CC_VMOV_V_I16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
320# define VMOV_V_I16(r0,r1) CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
321# define CC_VMOV_V_I32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
322# define VMOV_V_I32(r0,r1) CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
323# define VADD_I8(r0,r1,r2) voddd(ARM_VADD_I,r0,r1,r2)
324# define VADDQ_I8(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
325# define VADD_I16(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
326# define VADDQ_I16(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
327# define VADD_I32(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
328# define VADDQ_I32(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
329# define VADD_I64(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
330# define VADDQ_I64(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
331# define VQADD_S8(r0,r1,r2) voddd(ARM_VQADD_I,r0,r1,r2)
332# define VQADDQ_S8(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
333# define VQADD_U8(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
334# define VQADDQ_U8(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
335# define VQADD_S16(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
336# define VQADDQ_S16(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
337# define VQADD_U16(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
338# define VQADDQ_U16(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
339# define VQADD_S32(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
340# define VQADDQ_S32(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
341# define VQADD_U32(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
342# define VQADDQ_U32(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
343# define VQADD_S64(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
344# define VQADDQ_S64(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
345# define VQADD_U64(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
346# define VQADDQ_U64(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
347# define VADDL_S8(r0,r1,r2) voqdd(ARM_VADDL_I,r0,r1,r2)
348# define VADDL_U8(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
349# define VADDL_S16(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
350# define VADDL_U16(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
351# define VADDL_S32(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
352# define VADDL_U32(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
353# define VADDW_S8(r0,r1,r2) voqqd(ARM_VADDW_I,r0,r1,r2)
354# define VADDW_U8(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
355# define VADDW_S16(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
356# define VADDW_U16(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
357# define VADDW_S32(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
358# define VADDW_U32(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
359# define VSUB_I8(r0,r1,r2) voddd(ARM_VSUB_I,r0,r1,r2)
360# define VSUBQ_I8(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
361# define VSUB_I16(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
362# define VSUBQ_I16(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
363# define VSUB_I32(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
364# define VSUBQ_I32(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
365# define VSUB_I64(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
366# define VSUBQ_I64(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
367# define VQSUB_S8(r0,r1,r2) voddd(ARM_VQSUB_I,r0,r1,r2)
368# define VQSUBQ_S8(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
369# define VQSUB_U8(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
370# define VQSUBQ_U8(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
371# define VQSUB_S16(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
372# define VQSUBQ_S16(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
373# define VQSUB_U16(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
374# define VQSUBQ_U16(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
375# define VQSUB_S32(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
376# define VQSUBQ_S32(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
377# define VQSUB_U32(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
378# define VQSUBQ_U32(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
379# define VQSUB_S64(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
380# define VQSUBQ_S64(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
381# define VQSUB_U64(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
382# define VQSUBQ_U64(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
383# define VSUBL_S8(r0,r1,r2) voqdd(ARM_VSUBL_I,r0,r1,r2)
384# define VSUBL_U8(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
385# define VSUBL_S16(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
386# define VSUBL_U16(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
387# define VSUBL_S32(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
388# define VSUBL_U32(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
389# define VSUBW_S8(r0,r1,r2) voqqd(ARM_VSUBW_I,r0,r1,r2)
390# define VSUBW_U8(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
391# define VSUBW_S16(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
392# define VSUBW_U16(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
393# define VSUBW_S32(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
394# define VSUBW_U32(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
395# define VMUL_I8(r0,r1,r2) voddd(ARM_VMUL_I,r0,r1,r2)
396# define VMULQ_I8(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
397# define VMUL_I16(r0,r1,r2) voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
398# define VMULQ_I16(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
399# define VMUL_I32(r0,r1,r2) voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
400# define VMULQ_I32(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
401# define VMULL_S8(r0,r1,r2) voddd(ARM_VMULL_I,r0,r1,r2)
402# define VMULL_U8(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
403# define VMULL_S16(r0,r1,r2) voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
404# define VMULL_U16(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
405# define VMULL_S32(r0,r1,r2) voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
406# define VMULL_U32(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
407# define VABS_S8(r0,r1) vo_dd(ARM_VABS_I,r0,r1)
408# define VABSQ_S8(r0,r1) vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
409# define VABS_S16(r0,r1) vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
410# define VABSQ_S16(r0,r1) vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
411# define VABS_S32(r0,r1) vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
412# define VABSQ_S32(r0,r1) vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
413# define VQABS_S8(r0,r1) vo_dd(ARM_VQABS_I,r0,r1)
414# define VQABSQ_S8(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
415# define VQABS_S16(r0,r1) vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
416# define VQABSQ_S16(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
417# define VQABS_S32(r0,r1) vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
418# define VQABSQ_S32(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
419# define VNEG_S8(r0,r1) vo_dd(ARM_VNEG_I,r0,r1)
420# define VNEGQ_S8(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
421# define VNEG_S16(r0,r1) vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
422# define VNEGQ_S16(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
423# define VNEG_S32(r0,r1) vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
424# define VNEGQ_S32(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
425# define VQNEG_S8(r0,r1) vo_dd(ARM_VQNEG_I,r0,r1)
426# define VQNEGQ_S8(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
427# define VQNEG_S16(r0,r1) vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
428# define VQNEGQ_S16(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
429# define VQNEG_S32(r0,r1) vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
430# define VQNEGQ_S32(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
431# define VAND(r0,r1,r2) voddd(ARM_VAND,r0,r1,r2)
432# define VANDQ(r0,r1,r2) voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
433# define VBIC(r0,r1,r2) voddd(ARM_VBIC,r0,r1,r2)
434# define VBICQ(r0,r1,r2) voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
435# define VORR(r0,r1,r2) voddd(ARM_VORR,r0,r1,r2)
436# define VORRQ(r0,r1,r2) voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
437# define VORN(r0,r1,r2) voddd(ARM_VORN,r0,r1,r2)
438# define VORNQ(r0,r1,r2) voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
439# define VEOR(r0,r1,r2) voddd(ARM_VEOR,r0,r1,r2)
440# define VEORQ(r0,r1,r2) voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
441# define VMOV(r0,r1) VORR(r0,r1,r1)
442# define VMOVQ(r0,r1) VORRQ(r0,r1,r1)
443# define VMOVL_S8(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
444# define VMOVL_U8(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
445# define VMOVL_S16(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
446# define VMOVL_U16(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
447# define VMOVL_S32(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
448# define VMOVL_U32(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
449/* "oi" should be the result of encode_vfp_double */
450# define VIMM(oi,r0) vodi(oi,r0)
451# define VIMMQ(oi,r0) voqi(oi|ARM_V_Q,r0)
452/* index is multipled by four */
453# define CC_VLDRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR,r0,r1,i0)
454# define VLDRN_F32(r0,r1,i0) CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
455# define CC_VLDR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
456# define VLDR_F32(r0,r1,i0) CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
457# define CC_VLDRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
458# define VLDRN_F64(r0,r1,i0) CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
459# define CC_VLDR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
460# define VLDR_F64(r0,r1,i0) CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
461# define CC_VSTRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR,r0,r1,i0)
462# define VSTRN_F32(r0,r1,i0) CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
463# define CC_VSTR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
464# define VSTR_F32(r0,r1,i0) CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
465# define CC_VSTRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
466# define VSTRN_F64(r0,r1,i0) CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
467# define CC_VSTR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
468# define VSTR_F64(r0,r1,i0) CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
469# define vfp_movr_f(r0,r1) _vfp_movr_f(_jit,r0,r1)
470static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
471# define vfp_movr_d(r0,r1) _vfp_movr_d(_jit,r0,r1)
472static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
473# define vfp_movi_f(r0,i0) _vfp_movi_f(_jit,r0,i0)
474static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
475# define vfp_movi_d(r0,i0) _vfp_movi_d(_jit,r0,i0)
476static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
477# define vfp_extr_f(r0,r1) _vfp_extr_f(_jit,r0,r1)
478static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
479# define vfp_extr_d(r0,r1) _vfp_extr_d(_jit,r0,r1)
480static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
481# define vfp_extr_d_f(r0,r1) _vfp_extr_d_f(_jit,r0,r1)
482static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
483# define vfp_extr_f_d(r0,r1) _vfp_extr_f_d(_jit,r0,r1)
484static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
485# define vfp_truncr_f_i(r0,r1) _vfp_truncr_f_i(_jit,r0,r1)
486static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
487# define vfp_truncr_d_i(r0,r1) _vfp_truncr_d_i(_jit,r0,r1)
488static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
489# define vfp_absr_f(r0,r1) VABS_F32(r0,r1)
490# define vfp_absr_d(r0,r1) VABS_F64(r0,r1)
491# define vfp_negr_f(r0,r1) VNEG_F32(r0,r1)
492# define vfp_negr_d(r0,r1) VNEG_F64(r0,r1)
493# define vfp_sqrtr_f(r0,r1) VSQRT_F32(r0,r1)
494# define vfp_sqrtr_d(r0,r1) VSQRT_F64(r0,r1)
495# define vfp_addr_f(r0,r1,r2) VADD_F32(r0,r1,r2)
496# define vfp_addi_f(r0,r1,i0) _vfp_addi_f(_jit,r0,r1,i0)
497static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
498# define vfp_addr_d(r0,r1,r2) VADD_F64(r0,r1,r2)
499# define vfp_addi_d(r0,r1,i0) _vfp_addi_d(_jit,r0,r1,i0)
500static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
501# define vfp_subr_f(r0,r1,r2) VSUB_F32(r0,r1,r2)
502# define vfp_subi_f(r0,r1,i0) _vfp_subi_f(_jit,r0,r1,i0)
503static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
504# define vfp_subr_d(r0,r1,r2) VSUB_F64(r0,r1,r2)
505# define vfp_subi_d(r0,r1,i0) _vfp_subi_d(_jit,r0,r1,i0)
506static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
507# define vfp_rsbr_f(r0,r1,r2) vfp_subr_f(r0,r2,r1)
508# define vfp_rsbi_f(r0,r1,i0) _vfp_rsbi_f(_jit,r0,r1,i0)
509static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
510# define vfp_rsbr_d(r0,r1,r2) vfp_subr_d(r0,r2,r1)
511# define vfp_rsbi_d(r0,r1,i0) _vfp_rsbi_d(_jit,r0,r1,i0)
512static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
513# define vfp_mulr_f(r0,r1,r2) VMUL_F32(r0,r1,r2)
514# define vfp_muli_f(r0,r1,i0) _vfp_muli_f(_jit,r0,r1,i0)
515static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
516# define vfp_mulr_d(r0,r1,r2) VMUL_F64(r0,r1,r2)
517# define vfp_muli_d(r0,r1,i0) _vfp_muli_d(_jit,r0,r1,i0)
518static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
519# define vfp_divr_f(r0,r1,r2) VDIV_F32(r0,r1,r2)
520# define vfp_divi_f(r0,r1,i0) _vfp_divi_f(_jit,r0,r1,i0)
521static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
522# define vfp_divr_d(r0,r1,r2) VDIV_F64(r0,r1,r2)
523# define vfp_divi_d(r0,r1,i0) _vfp_divi_d(_jit,r0,r1,i0)
524static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
525# define vfp_cmp_f(r0,r1) _vfp_cmp_f(_jit,r0,r1)
526static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
527# define vfp_cmp_d(r0,r1) _vfp_cmp_d(_jit,r0,r1)
528static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
529# define vcmp01_x(c0,c1,r0) _vcmp01_x(_jit,c0,c1,r0)
530static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
531# define vcmp01_f(c0,c1,r0,r1,r2) _vcmp01_f(_jit,c0,c1,r0,r1,r2)
532static void _vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
533# define vcmp01_d(c0,c1,r0,r1,r2) _vcmp01_d(_jit,c0,c1,r0,r1,r2)
534static void _vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
535# define vfp_ltr_f(r0,r1,r2) vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
536# define vfp_lti_f(r0,r1,i0) _vfp_lti_f(_jit,r0,r1,i0)
537static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
538# define vfp_ltr_d(r0,r1,r2) vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
539# define vfp_lti_d(r0,r1,i0) _vfp_lti_d(_jit,r0,r1,i0)
540static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
541# define vfp_ler_f(r0,r1,r2) vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
542# define vfp_lei_f(r0,r1,i0) _vfp_lei_f(_jit,r0,r1,i0)
543static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
544# define vfp_ler_d(r0,r1,r2) vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
545# define vfp_lei_d(r0,r1,i0) _vfp_lei_d(_jit,r0,r1,i0)
546static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
547# define vfp_eqr_f(r0,r1,r2) vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
548# define vfp_eqi_f(r0,r1,i0) _vfp_eqi_f(_jit,r0,r1,i0)
549static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
550# define vfp_eqr_d(r0,r1,r2) vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
551# define vfp_eqi_d(r0,r1,i0) _vfp_eqi_d(_jit,r0,r1,i0)
552static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
553# define vfp_ger_f(r0,r1,r2) vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
554# define vfp_gei_f(r0,r1,i0) _vfp_gei_f(_jit,r0,r1,i0)
555static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
556# define vfp_ger_d(r0,r1,r2) vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
557# define vfp_gei_d(r0,r1,i0) _vfp_gei_d(_jit,r0,r1,i0)
558static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
559# define vfp_gtr_f(r0,r1,r2) vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
560# define vfp_gti_f(r0,r1,i0) _vfp_gti_f(_jit,r0,r1,i0)
561static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
562# define vfp_gtr_d(r0,r1,r2) vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
563# define vfp_gti_d(r0,r1,i0) _vfp_gti_d(_jit,r0,r1,i0)
564static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
565# define vfp_ner_f(r0,r1,r2) vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
566# define vfp_nei_f(r0,r1,i0) _vfp_nei_f(_jit,r0,r1,i0)
567static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
568# define vfp_ner_d(r0,r1,r2) vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
569# define vfp_nei_d(r0,r1,i0) _vfp_nei_d(_jit,r0,r1,i0)
570static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
571# define vcmp10_x(c0,r0) _vcmp10_x(_jit,c0,r0)
572static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
573# define vcmp_10_f(c0,r0,r1,r2) _vcmp_10_f(_jit,c0,r0,r1,r2)
574static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
575# define vcmp_10_d(c0,r0,r1,r2) _vcmp_10_d(_jit,c0,r0,r1,r2)
576static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
577# define vfp_unltr_f(r0,r1,r2) vcmp_10_f(ARM_CC_GE,r0,r1,r2)
578# define vfp_unlti_f(r0,r1,i0) _vfp_unlti_f(_jit,r0,r1,i0)
579static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
580# define vfp_unltr_d(r0,r1,r2) vcmp_10_d(ARM_CC_GE,r0,r1,r2)
581# define vfp_unlti_d(r0,r1,i0) _vfp_unlti_d(_jit,r0,r1,i0)
582static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
583# define vfp_unler_f(r0,r1,r2) vcmp_10_f(ARM_CC_GT,r0,r1,r2)
584# define vfp_unlei_f(r0,r1,i0) _vfp_unlei_f(_jit,r0,r1,i0)
585static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
586# define vfp_unler_d(r0,r1,r2) vcmp_10_d(ARM_CC_GT,r0,r1,r2)
587# define vfp_unlei_d(r0,r1,i0) _vfp_unlei_d(_jit,r0,r1,i0)
588static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
589# define vfp_uneqr_x(r0) _vfp_uneqr_x(_jit,r0)
590static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
591# define vfp_uneqr_f(r0,r1,r2) _vfp_uneqr_f(_jit,r0,r1,r2)
592static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
593# define vfp_uneqi_f(r0,r1,i0) _vfp_uneqi_f(_jit,r0,r1,i0)
594static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
595# define vfp_uneqr_d(r0,r1,r2) _vfp_uneqr_d(_jit,r0,r1,r2)
596static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
597# define vfp_uneqi_d(r0,r1,i0) _vfp_uneqi_d(_jit,r0,r1,i0)
598static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
599# define vcmp_01_x(c0,r0) _vcmp_01_x(_jit,c0,r0)
600static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
601# define vcmp_01_f(c0,r0,r1,r2) _vcmp_01_f(_jit,c0,r0,r1,r2)
602static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
603# define vcmp_01_d(c0,r0,r1,r2) _vcmp_01_d(_jit,c0,r0,r1,r2)
604static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
605# define vfp_unger_f(r0,r1,r2) vcmp_01_f(ARM_CC_CS,r0,r1,r2)
606# define vfp_ungei_f(r0,r1,i0) _vfp_ungei_f(_jit,r0,r1,i0)
607static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
608# define vfp_unger_d(r0,r1,r2) vcmp_01_d(ARM_CC_CS,r0,r1,r2)
609# define vfp_ungei_d(r0,r1,i0) _vfp_ungei_d(_jit,r0,r1,i0)
610static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
611# define vfp_ungtr_f(r0,r1,r2) vcmp_01_f(ARM_CC_HI,r0,r1,r2)
612# define vfp_ungti_f(r0,r1,i0) _vfp_ungti_f(_jit,r0,r1,i0)
613static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
614# define vfp_ungtr_d(r0,r1,r2) vcmp_01_d(ARM_CC_HI,r0,r1,r2)
615# define vfp_ungti_d(r0,r1,i0) _vfp_ungti_d(_jit,r0,r1,i0)
616static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
617# define vfp_ltgtr_x(r0) _vfp_ltgtr_x(_jit,r0)
618static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
619# define vfp_ltgtr_f(r0,r1,r2) _vfp_ltgtr_f(_jit,r0,r1,r2)
620static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
621# define vfp_ltgti_f(r0,r1,i0) _vfp_ltgti_f(_jit,r0,r1,i0)
622static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
623# define vfp_ltgtr_d(r0,r1,r2) _vfp_ltgtr_d(_jit,r0,r1,r2)
624static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
625# define vfp_ltgti_d(r0,r1,i0) _vfp_ltgti_d(_jit,r0,r1,i0)
626static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
627# define vfp_ordr_f(r0,r1,r2) _vfp_ordr_f(_jit,r0,r1,r2)
628static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
629# define vfp_ordi_f(r0,r1,i0) _vfp_ordi_f(_jit,r0,r1,i0)
630static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
631# define vfp_ordr_d(r0,r1,r2) _vfp_ordr_d(_jit,r0,r1,r2)
632static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
633# define vfp_ordi_d(r0,r1,i0) _vfp_ordi_d(_jit,r0,r1,i0)
634static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
635# define vfp_unordr_f(r0,r1,r2) _vfp_unordr_f(_jit,r0,r1,r2)
636static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
637# define vfp_unordi_f(r0,r1,i0) _vfp_unordi_f(_jit,r0,r1,i0)
638static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
639# define vfp_unordr_d(r0,r1,r2) _vfp_unordr_d(_jit,r0,r1,r2)
640static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
641# define vfp_unordi_d(r0,r1,i0) _vfp_unordi_d(_jit,r0,r1,i0)
642static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
643# define vbcmp_x(cc,i0) _vbcmp_x(_jit,cc,i0)
644static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
645# define vbcmp_f(cc,i0,r0,r1) _vbcmp_f(_jit,cc,i0,r0,r1)
646static jit_word_t
647_vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
648# define vbcmp_x(cc,i0) _vbcmp_x(_jit,cc,i0)
649static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
650# define vbcmp_d(cc,i0,r0,r1) _vbcmp_d(_jit,cc,i0,r0,r1)
651static jit_word_t
652_vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
653# define vfp_bltr_f(i0,r0,r1) vbcmp_f(ARM_CC_MI,i0,r0,r1)
654# define vfp_blti_f(i0,r0,i1) _vfp_blti_f(_jit,i0,r0,i1)
655static jit_word_t _vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
656# define vfp_bltr_d(i0,r0,r1) vbcmp_d(ARM_CC_MI,i0,r0,r1)
657static jit_word_t _vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
658# define vfp_blti_d(i0,r0,i1) _vfp_blti_d(_jit,i0,r0,i1)
659# define vfp_bler_f(i0,r0,r1) vbcmp_f(ARM_CC_LS,i0,r0,r1)
660# define vfp_blei_f(i0,r0,i1) _vfp_blei_f(_jit,i0,r0,i1)
661static jit_word_t _vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
662# define vfp_bler_d(i0,r0,r1) vbcmp_d(ARM_CC_LS,i0,r0,r1)
663# define vfp_blei_d(i0,r0,i1) _vfp_blei_d(_jit,i0,r0,i1)
664static jit_word_t _vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
665# define vfp_beqr_f(i0,r0,r1) vbcmp_f(ARM_CC_EQ,i0,r0,r1)
666# define vfp_beqi_f(i0,r0,i1) _vfp_beqi_f(_jit,i0,r0,i1)
667static jit_word_t _vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
668# define vfp_beqr_d(i0,r0,r1) vbcmp_d(ARM_CC_EQ,i0,r0,r1)
669# define vfp_beqi_d(i0,r0,i1) _vfp_beqi_d(_jit,i0,r0,i1)
670static jit_word_t _vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
671# define vfp_bger_f(i0,r0,r1) vbcmp_f(ARM_CC_GE,i0,r0,r1)
672# define vfp_bgei_f(i0,r0,i1) _vfp_bgei_f(_jit,i0,r0,i1)
673static jit_word_t _vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
674# define vfp_bger_d(i0,r0,r1) vbcmp_d(ARM_CC_GE,i0,r0,r1)
675# define vfp_bgei_d(i0,r0,i1) _vfp_bgei_d(_jit,i0,r0,i1)
676static jit_word_t _vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
677# define vfp_bgtr_f(i0,r0,r1) vbcmp_f(ARM_CC_GT,i0,r0,r1)
678# define vfp_bgti_f(i0,r0,i1) _vfp_bgti_f(_jit,i0,r0,i1)
679static jit_word_t _vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
680# define vfp_bgtr_d(i0,r0,r1) vbcmp_d(ARM_CC_GT,i0,r0,r1)
681# define vfp_bgti_d(i0,r0,i1) _vfp_bgti_d(_jit,i0,r0,i1)
682static jit_word_t _vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
683# define vfp_bner_f(i0,r0,r1) vbcmp_f(ARM_CC_NE,i0,r0,r1)
684# define vfp_bnei_f(i0,r0,i1) _vfp_bnei_f(_jit,i0,r0,i1)
685static jit_word_t _vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
686# define vfp_bner_d(i0,r0,r1) vbcmp_d(ARM_CC_NE,i0,r0,r1)
687# define vfp_bnei_d(i0,r0,i1) _vfp_bnei_d(_jit,i0,r0,i1)
688static jit_word_t _vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
689# define vbncmp_x(cc,i0) _vbncmp_x(_jit,cc,i0)
690static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
691# define vbncmp_f(cc,i0,r0,r1) _vbncmp_f(_jit,cc,i0,r0,r1)
692static jit_word_t
693_vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
694# define vbncmp_d(cc,i0,r0,r1) _vbncmp_d(_jit,cc,i0,r0,r1)
695static jit_word_t
696_vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
697# define vfp_bunltr_f(i0,r0,r1) vbncmp_f(ARM_CC_GE,i0,r0,r1)
698# define vfp_bunlti_f(i0,r0,i1) _vfp_bunlti_f(_jit,i0,r0,i1)
699static jit_word_t
700_vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
701# define vfp_bunltr_d(i0,r0,r1) vbncmp_d(ARM_CC_GE,i0,r0,r1)
702# define vfp_bunlti_d(i0,r0,i1) _vfp_bunlti_d(_jit,i0,r0,i1)
703static jit_word_t
704_vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
705# define vfp_bunler_f(i0,r0,r1) vbncmp_f(ARM_CC_GT,i0,r0,r1)
706# define vfp_bunlei_f(i0,r0,i1) _vfp_bunlei_f(_jit,i0,r0,i1)
707static jit_word_t
708_vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
709# define vfp_bunler_d(i0,r0,r1) vbncmp_d(ARM_CC_GT,i0,r0,r1)
710# define vfp_bunlei_d(i0,r0,i1) _vfp_bunlei_d(_jit,i0,r0,i1)
711static jit_word_t
712_vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
713# define vfp_buneqr_x(i0) _vfp_buneqr_x(_jit,i0)
714static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
715# define vfp_buneqr_f(i0,r0,r1) _vfp_buneqr_f(_jit,i0,r0,r1)
716static jit_word_t
717_vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
718# define vfp_buneqi_f(i0,r0,i1) _vfp_buneqi_f(_jit,i0,r0,i1)
719static jit_word_t
720_vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
721# define vfp_buneqr_d(i0,r0,r1) _vfp_buneqr_d(_jit,i0,r0,r1)
722static jit_word_t
723_vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
724# define vfp_buneqi_d(i0,r0,i1) _vfp_buneqi_d(_jit,i0,r0,i1)
725static jit_word_t
726_vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
727# define vfp_bunger_x(i0) _vfp_bunger_x(_jit,i0)
728static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
729# define vfp_bunger_f(i0,r0,r1) _vfp_bunger_f(_jit,i0,r0,r1)
730static jit_word_t
731_vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
732# define vfp_bungei_f(i0,r0,i1) _vfp_bungei_f(_jit,i0,r0,i1)
733static jit_word_t
734_vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
735# define vfp_bunger_d(i0,r0,r1) _vfp_bunger_d(_jit,i0,r0,r1)
736static jit_word_t
737_vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
738# define vfp_bungei_d(i0,r0,i1) _vfp_bungei_d(_jit,i0,r0,i1)
739static jit_word_t
740_vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
741# define vfp_bungtr_f(i0,r0,r1) vbcmp_f(ARM_CC_HI,i0,r0,r1)
742# define vfp_bungti_f(i0,r0,i1) _vfp_bungti_f(_jit,i0,r0,i1)
743static jit_word_t
744_vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
745# define vfp_bungtr_d(i0,r0,r1) vbcmp_d(ARM_CC_HI,i0,r0,r1)
746# define vfp_bungti_d(i0,r0,i1) _vfp_bungti_d(_jit,i0,r0,i1)
747static jit_word_t
748_vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
749# define vfp_bltgtr_x(i0) _vfp_bltgtr_x(_jit,i0)
750static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
751# define vfp_bltgtr_f(i0,r0,r1) _vfp_bltgtr_f(_jit,i0,r0,r1)
752static jit_word_t
753_vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
754# define vfp_bltgti_f(i0,r0,i1) _vfp_bltgti_f(_jit,i0,r0,i1)
755static jit_word_t
756_vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
757# define vfp_bltgtr_d(i0,r0,r1) _vfp_bltgtr_d(_jit,i0,r0,r1)
758static jit_word_t
759_vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
760# define vfp_bltgti_d(i0,r0,i1) _vfp_bltgti_d(_jit,i0,r0,i1)
761static jit_word_t
762_vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
763# define vfp_bordr_f(i0,r0,r1) vbcmp_f(ARM_CC_VC,i0,r0,r1)
764# define vfp_bordi_f(i0,r0,i1) _vfp_bordi_f(_jit,i0,r0,i1)
765static jit_word_t
766_vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
767# define vfp_bordr_d(i0,r0,r1) vbcmp_d(ARM_CC_VC,i0,r0,r1)
768# define vfp_bordi_d(i0,r0,i1) _vfp_bordi_d(_jit,i0,r0,i1)
769static jit_word_t
770_vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
771# define vfp_bunordr_f(i0,r0,r1) vbcmp_f(ARM_CC_VS,i0,r0,r1)
772# define vfp_bunordi_f(i0,r0,i1) _vfp_bunordi_f(_jit,i0,r0,i1)
773static jit_word_t
774_vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
775# define vfp_bunordr_d(i0,r0,r1) vbcmp_d(ARM_CC_VS,i0,r0,r1)
776# define vfp_bunordi_d(i0,r0,i1) _vfp_bunordi_d(_jit,i0,r0,i1)
777static jit_word_t
778_vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
779# define vfp_ldr_f(r0,r1) VLDR_F32(r0,r1,0)
780# define vfp_ldr_d(r0,r1) VLDR_F64(r0,r1,0)
781# define vfp_ldi_f(r0,i0) _vfp_ldi_f(_jit,r0,i0)
782static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
783# define vfp_ldi_d(r0,i0) _vfp_ldi_d(_jit,r0,i0)
784static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
785# define vfp_ldxr_f(r0,r1,r2) _vfp_ldxr_f(_jit,r0,r1,r2)
786static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
787# define vfp_ldxr_d(r0,r1,r2) _vfp_ldxr_d(_jit,r0,r1,r2)
788static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
789# define vfp_ldxi_f(r0,r1,i0) _vfp_ldxi_f(_jit,r0,r1,i0)
790static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
791# define vfp_ldxi_d(r0,r1,i0) _vfp_ldxi_d(_jit,r0,r1,i0)
792static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
793# define vfp_str_f(r0,r1) VSTR_F32(r1,r0,0)
794# define vfp_str_d(r0,r1) VSTR_F64(r1,r0,0)
795# define vfp_sti_f(i0,r0) _vfp_sti_f(_jit,i0,r0)
796static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
797# define vfp_sti_d(i0,r0) _vfp_sti_d(_jit,i0,r0)
798static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
799# define vfp_stxr_f(r0,r1,r2) _vfp_stxr_f(_jit,r0,r1,r2)
800static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
801# define vfp_stxr_d(r0,r1,r2) _vfp_stxr_d(_jit,r0,r1,r2)
802static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
803# define vfp_stxi_f(i0,r0,r1) _vfp_stxi_f(_jit,i0,r0,r1)
804static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
805# define vfp_stxi_d(i0,r0,r1) _vfp_stxi_d(_jit,i0,r0,r1)
806static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
807# define vfp_vaarg_d(r0, r1) _vfp_vaarg_d(_jit, r0, r1)
808static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
809#endif
810
811#if CODE
812# define vfp_regno(rn) (((rn) - 16) >> 1)
813
814static int
815encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
816{
817 int code, mode, imm, mask;
818
819 if (hi != lo) {
820 if (mov && !inv) {
821 /* (I64)
822 * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
823 */
824 for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
825 imm = lo & mask;
826 if (imm != mask && imm != 0)
827 goto fail;
828 imm = hi & mask;
829 if (imm != mask && imm != 0)
830 goto fail;
831 }
832 mode = 0xe20;
833 imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
834 ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >> 3) |
835 ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
836 ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >> 7));
837 goto success;
838 }
839 goto fail;
840 }
841 /* (I32)
842 * 00000000 00000000 00000000 abcdefgh
843 * 00000000 00000000 abcdefgh 00000000
844 * 00000000 abcdefgh 00000000 00000000
845 * abcdefgh 00000000 00000000 00000000 */
846 for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
847 if ((lo & mask) == lo) {
848 imm = lo >> (mode << 3);
849 mode <<= 9;
850 goto success;
851 }
852 }
853 /* (I16)
854 * 00000000 abcdefgh 00000000 abcdefgh
855 * abcdefgh 00000000 abcdefgh 00000000 */
856 for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
857 if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
858 imm = lo >> (mode << 3);
859 mode = 0x800 | (mode << 9);
860 goto success;
861 }
862 }
863 if (mov) {
864 /* (I32)
865 * 00000000 00000000 abcdefgh 11111111
866 * 00000000 abcdefgh 11111111 11111111 */
867 for (mode = 0, mask = 0xff; mode < 2;
868 mask = (mask << 8) | 0xff, mode++) {
869 if ((lo & mask) == mask &&
870 !((lo & ~mask) >> 8) &&
871 (imm = lo >> (8 + (mode << 8)))) {
872 mode = 0xc00 | (mode << 8);
873 goto success;
874 }
875 }
876 if (!inv) {
877 /* (F32)
878 * aBbbbbbc defgh000 00000000 00000000
879 * from the ARM Architecture Reference Manual:
880 * In this entry, B = NOT(b). The bit pattern represents the
881 * floating-point number (-1)^s* 2^exp * mantissa, where
882 * S = UInt(a),
883 * exp = UInt(NOT(b):c:d)-3 and
884 * mantissa = (16+UInt(e:f:g:h))/16. */
885 if ((lo & 0x7ffff) == 0 &&
886 (((lo & 0x7e000000) == 0x3e000000) ||
887 ((lo & 0x7e000000) == 0x40000000))) {
888 mode = 0xf00;
889 imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
890 goto success;
891 }
892 }
893 }
894
895fail:
896 /* need another approach (load from memory, move from arm register, etc) */
897 return (-1);
898
899success:
900 code = inv ? ARM_VMVNI : ARM_VMOVI;
901 switch ((mode & 0xf00) >> 8) {
902 case 0x0: case 0x2: case 0x4: case 0x6:
903 case 0x8: case 0xa:
904 if (inv) mode |= 0x20;
905 if (!mov) mode |= 0x100;
906 break;
907 case 0x1: case 0x3: case 0x5: case 0x7:
908 /* should actually not reach here */
909 assert(!inv);
910 case 0x9: case 0xb:
911 assert(!mov);
912 break;
913 case 0xc: case 0xd:
914 /* should actually not reach here */
915 assert(inv);
916 case 0xe:
917 assert(mode & 0x20);
918 assert(mov && !inv);
919 break;
920 default:
921 assert(!(mode & 0x20));
922 break;
923 }
924 imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
925 code |= mode | imm;
926 if (jit_thumb_p()) {
927 if (code & 0x1000000)
928 code |= 0xff000000;
929 else
930 code |= 0xef000000;
931 }
932 else
933 code |= ARM_CC_NV;
934 return (code);
935}
936
937static void
938_vodi(jit_state_t *_jit, int oi, int r0)
939{
940 jit_thumb_t thumb;
941 assert(!(oi & 0x0000f000));
942 assert(!(r0 & 1)); r0 = vfp_regno(r0);
943 thumb.i = oi|(_u4(r0)<<12);
944 if (jit_thumb_p())
945 iss(thumb.s[0], thumb.s[1]);
946 else
947 ii(thumb.i);
948}
949
950static void
951_voqi(jit_state_t *_jit, int oi, int r0)
952{
953 jit_thumb_t thumb;
954 assert(!(oi & 0x0000f000));
955 assert(!(r0 & 3)); r0 = vfp_regno(r0);
956 thumb.i = oi|(_u4(r0)<<12);
957 if (jit_thumb_p())
958 iss(thumb.s[0], thumb.s[1]);
959 else
960 ii(thumb.i);
961}
962
963static void
964_cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
965{
966 jit_thumb_t thumb;
967 assert(!(cc & 0x0fffffff));
968 assert(!(o & 0xf000f00f));
969 if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0);
970 if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1);
971 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
972 if (jit_thumb_p())
973 iss(thumb.s[0], thumb.s[1]);
974 else
975 ii(thumb.i);
976}
977
978static void
979_cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
980{
981 jit_thumb_t thumb;
982 assert(!(cc & 0x0fffffff));
983 assert(!(o & 0xf000f00f));
984 assert(!(r0 & 1) && !(r1 & 1));
985 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
986 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
987 if (jit_thumb_p())
988 iss(thumb.s[0], thumb.s[1]);
989 else
990 ii(thumb.i);
991}
992
993static void
994_cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
995{
996 jit_thumb_t thumb;
997 assert(!(cc & 0x0fffffff));
998 assert(!(o & 0xf000f00f));
999 assert(!(r0 & 3) && !(r1 & 1));
1000 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1001 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1002 if (jit_thumb_p())
1003 iss(thumb.s[0], thumb.s[1]);
1004 else
1005 ii(thumb.i);
1006}
1007
1008static void
1009_cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
1010{
1011 jit_thumb_t thumb;
1012 assert(!(cc & 0x0fffffff));
1013 assert(!(o & 0xf000f00f));
1014 assert(!(r0 & 3) && !(r1 & 3));
1015 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1016 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1017 if (jit_thumb_p())
1018 iss(thumb.s[0], thumb.s[1]);
1019 else
1020 ii(thumb.i);
1021}
1022
1023static void
1024_cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1025{
1026 jit_thumb_t thumb;
1027 assert(!(cc & 0x0fffffff));
1028 assert(!(o & 0xf000f00f));
1029 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1030 if (jit_thumb_p())
1031 iss(thumb.s[0], thumb.s[1]);
1032 else
1033 ii(thumb.i);
1034}
1035
1036static void
1037_cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1038{
1039 jit_thumb_t thumb;
1040 assert(!(cc & 0x0fffffff));
1041 assert(!(o & 0xf000f00f));
1042 if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1);
1043 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1044 if (jit_thumb_p())
1045 iss(thumb.s[0], thumb.s[1]);
1046 else
1047 ii(thumb.i);
1048}
1049
1050static void
1051_cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1052{
1053 jit_thumb_t thumb;
1054 assert(!(cc & 0x0fffffff));
1055 assert(!(o & 0xf000f00f));
1056 if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1);
1057 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1058 if (jit_thumb_p())
1059 iss(thumb.s[0], thumb.s[1]);
1060 else
1061 ii(thumb.i);
1062}
1063
1064static void
1065_cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1066{
1067 jit_thumb_t thumb;
1068 assert(!(cc & 0x0fffffff));
1069 assert(!(o & 0xf000f00f));
1070 /* use same bit pattern, to set opc1... */
1071 if (r1 & 1) o |= ARM_V_I32; r1 = vfp_regno(r1);
1072 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1073 if (jit_thumb_p())
1074 iss(thumb.s[0], thumb.s[1]);
1075 else
1076 ii(thumb.i);
1077}
1078
1079static void
1080_cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1081{
1082 jit_thumb_t thumb;
1083 assert(!(cc & 0x0fffffff));
1084 assert(!(o & 0xf00ff00f));
1085 assert(!(r2 & 1));
1086 r2 = vfp_regno(r2);
1087 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1088 if (jit_thumb_p())
1089 iss(thumb.s[0], thumb.s[1]);
1090 else
1091 ii(thumb.i);
1092}
1093
1094static void
1095_cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1096{
1097 jit_thumb_t thumb;
1098 assert(!(cc & 0x0fffffff));
1099 assert(!(o & 0xf00ff00f));
1100 if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0);
1101 if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1);
1102 if (r2 & 1) o |= ARM_V_M; r2 = vfp_regno(r2);
1103 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1104 if (jit_thumb_p())
1105 iss(thumb.s[0], thumb.s[1]);
1106 else
1107 ii(thumb.i);
1108}
1109
1110static void
1111_cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1112{
1113 jit_thumb_t thumb;
1114 assert(!(cc & 0x0fffffff));
1115 assert(!(o & 0xf00ff00f));
1116 assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
1117 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1118 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1119 if (jit_thumb_p())
1120 iss(thumb.s[0], thumb.s[1]);
1121 else
1122 ii(thumb.i);
1123}
1124
1125static void
1126_cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1127{
1128 jit_thumb_t thumb;
1129 assert(!(cc & 0x0fffffff));
1130 assert(!(o & 0xf00ff00f));
1131 assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
1132 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1133 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1134 if (jit_thumb_p())
1135 iss(thumb.s[0], thumb.s[1]);
1136 else
1137 ii(thumb.i);
1138}
1139
1140static void
1141_cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1142{
1143 jit_thumb_t thumb;
1144 assert(!(cc & 0x0fffffff));
1145 assert(!(o & 0xf00ff00f));
1146 assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
1147 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1148 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1149 if (jit_thumb_p())
1150 iss(thumb.s[0], thumb.s[1]);
1151 else
1152 ii(thumb.i);
1153}
1154
1155static void
1156_cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1157{
1158 jit_thumb_t thumb;
1159 assert(!(cc & 0x0fffffff));
1160 assert(!(o & 0xf00ff00f));
1161 assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
1162 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1163 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1164 if (jit_thumb_p())
1165 iss(thumb.s[0], thumb.s[1]);
1166 else
1167 ii(thumb.i);
1168}
1169
1170static void
1171_cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1172{
1173 jit_thumb_t thumb;
1174 /* i0 << 2 is byte offset */
1175 assert(!(cc & 0x0fffffff));
1176 assert(!(o & 0xf00ff0ff));
1177 if (r0 & 1) {
1178 assert(!(o & ARM_V_F64));
1179 o |= ARM_V_D;
1180 }
1181 r0 = vfp_regno(r0);
1182 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
1183 if (jit_thumb_p())
1184 iss(thumb.s[0], thumb.s[1]);
1185 else
1186 ii(thumb.i);
1187}
1188
1189static void
1190_cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1191{
1192 jit_thumb_t thumb;
1193 assert(!(cc & 0x0fffffff));
1194 assert(!(o & 0xf00ff0ff));
1195 /* save i0 double precision registers */
1196 if (o & ARM_V_F64) i0 <<= 1;
1197 /* if (r1 & 1) cc & ARM_V_F64 must be false */
1198 if (r1 & 1) o |= ARM_V_D; r1 = vfp_regno(r1);
1199 assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
1200 thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
1201 if (jit_thumb_p())
1202 iss(thumb.s[0], thumb.s[1]);
1203 else
1204 ii(thumb.i);
1205}
1206
1207static void
1208_vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1209{
1210 if (r0 != r1) {
1211 if (jit_fpr_p(r1)) {
1212 if (jit_fpr_p(r0))
1213 VMOV_F32(r0, r1);
1214 else
1215 VMOV_A_S(r0, r1);
1216 }
1217 else if (jit_fpr_p(r0))
1218 VMOV_S_A(r0, r1);
1219 else
1220 movr(r0, r1);
1221 }
1222}
1223
1224static void
1225_vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1226{
1227 if (r0 != r1) {
1228 if (jit_fpr_p(r1)) {
1229 if (jit_fpr_p(r0))
1230 VMOV_F64(r0, r1);
1231 else
1232 VMOV_AA_D(r0, r0 + 1, r1);
1233 }
1234 else if (jit_fpr_p(r0))
1235 VMOV_D_AA(r0, r1, r1 + 1);
1236 else {
1237 /* minor consistency check */
1238 assert(r0 + 1 != r1 && r0 -1 != r1);
1239 movr(r0, r1);
1240 movr(r0 + 1, r1 + 1);
1241 }
1242 }
1243}
1244
1245static void
1246_vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
1247{
1248 union {
1249 jit_int32_t i;
1250 jit_float32_t f;
1251 } u;
1252 jit_int32_t reg;
1253 jit_int32_t code;
1254 u.f = i0;
1255 if (jit_fpr_p(r0)) {
1256 /* float arguments are packed, for others,
1257 * lightning only address even registers */
1258 if (!(r0 & 1) && (r0 - 16) >= 0 &&
1259 ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
1260 (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
1261 VIMM(code, r0);
1262 else {
1263 reg = jit_get_reg(jit_class_gpr);
1264 movi(rn(reg), u.i);
1265 VMOV_S_A(r0, rn(reg));
1266 jit_unget_reg(reg);
1267 }
1268 }
1269 else
1270 movi(r0, u.i);
1271}
1272
1273static void
1274_vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
1275{
1276 union {
1277 jit_int32_t i[2];
1278 jit_float64_t d;
1279 } u;
1280 jit_int32_t code;
1281 jit_int32_t rg0, rg1;
1282 u.d = i0;
1283 if (jit_fpr_p(r0)) {
1284 if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
1285 (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
1286 VIMM(code, r0);
1287 else {
1288 rg0 = jit_get_reg(jit_class_gpr);
1289 rg1 = jit_get_reg(jit_class_gpr);
1290 movi(rn(rg0), u.i[0]);
1291 movi(rn(rg1), u.i[1]);
1292 VMOV_D_AA(r0, rn(rg0), rn(rg1));
1293 jit_unget_reg(rg1);
1294 jit_unget_reg(rg0);
1295 }
1296 }
1297 else {
1298 movi(r0, u.i[0]);
1299 movi(r0 + 1, u.i[1]);
1300 }
1301}
1302
1303static void
1304_vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1305{
1306 jit_int32_t reg;
1307 if (jit_fpr_p(r1)) {
1308 if (jit_fpr_p(r0))
1309 VCVT_F64_F32(r0, r1);
1310 else {
1311 reg = jit_get_reg(jit_class_fpr);
1312 VCVT_F64_F32(rn(reg), r1);
1313 VMOV_A_S(r0, rn(reg));
1314 jit_unget_reg(reg);
1315 }
1316 }
1317 else {
1318 reg = jit_get_reg(jit_class_fpr);
1319 VMOV_S_A(rn(reg), r1);
1320 VCVT_F64_F32(rn(reg), rn(reg));
1321 if (jit_fpr_p(r0))
1322 VMOV_F32(r0, rn(reg));
1323 else
1324 VMOV_A_S(r0, rn(reg));
1325 jit_unget_reg(reg);
1326 }
1327}
1328
1329static void
1330_vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1331{
1332 jit_int32_t reg;
1333 if (jit_fpr_p(r1)) {
1334 if (jit_fpr_p(r0))
1335 VCVT_F32_F64(r0, r1);
1336 else {
1337 reg = jit_get_reg(jit_class_fpr);
1338 VCVT_F32_F64(rn(reg), r1);
1339 VMOV_AA_D(r0, r0 + 1, rn(reg));
1340 jit_unget_reg(reg);
1341 }
1342 }
1343 else {
1344 reg = jit_get_reg(jit_class_fpr);
1345 VMOV_D_AA(rn(reg), r1, r1 + 1);
1346 VCVT_F32_F64(rn(reg), rn(reg));
1347 if (jit_fpr_p(r0))
1348 VMOV_F64(r0, rn(reg));
1349 else
1350 VMOV_AA_D(r0, r0 + 1, rn(reg));
1351 jit_unget_reg(reg);
1352 }
1353}
1354
1355static void
1356_vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1357{
1358 jit_int32_t reg;
1359 if (jit_fpr_p(r0)) {
1360 VMOV_V_I32(r0, r1);
1361 VCVT_F32_S32(r0, r0);
1362 }
1363 else {
1364 reg = jit_get_reg(jit_class_fpr);
1365 VMOV_V_I32(rn(reg), r1);
1366 VCVT_F32_S32(rn(reg), rn(reg));
1367 VMOV_F32(r0, rn(reg));
1368 jit_unget_reg(reg);
1369 }
1370}
1371
1372static void
1373_vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1374{
1375 jit_int32_t reg;
1376 if (jit_fpr_p(r0)) {
1377 VMOV_V_I32(r0, r1);
1378 VCVT_F64_S32(r0, r0);
1379 }
1380 else {
1381 reg = jit_get_reg(jit_class_fpr);
1382 VMOV_V_I32(rn(reg), r1);
1383 VCVT_F64_S32(rn(reg), rn(reg));
1384 VMOV_F64(r0, rn(reg));
1385 jit_unget_reg(reg);
1386 }
1387}
1388
1389static void
1390_vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1391{
1392 jit_int32_t reg;
1393 reg = jit_get_reg(jit_class_fpr);
1394 if (jit_fpr_p(r1))
1395 VCVT_S32_F32(rn(reg), r1);
1396 else {
1397 VMOV_V_I32(rn(reg), r1);
1398 VCVT_S32_F32(rn(reg), rn(reg));
1399 }
1400 VMOV_A_S32(r0, rn(reg));
1401 jit_unget_reg(reg);
1402}
1403
1404static void
1405_vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1406{
1407 jit_int32_t reg;
1408 reg = jit_get_reg(jit_class_fpr);
1409 if (jit_fpr_p(r1))
1410 VCVT_S32_F64(rn(reg), r1);
1411 else {
1412 VMOV_V_I32(rn(reg), r1);
1413 VCVT_S32_F64(rn(reg), rn(reg));
1414 }
1415 VMOV_A_S32(r0, rn(reg));
1416 jit_unget_reg(reg);
1417}
1418
1419# define fopi(name) \
1420static void \
1421_vfp_##name##i_f(jit_state_t *_jit, \
1422 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \
1423{ \
1424 jit_int32_t reg = jit_get_reg(jit_class_fpr); \
1425 vfp_movi_f(rn(reg), i0); \
1426 vfp_##name##r_f(r0, r1, rn(reg)); \
1427 jit_unget_reg(reg); \
1428}
1429# define dopi(name) \
1430static void \
1431_vfp_##name##i_d(jit_state_t *_jit, \
1432 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \
1433{ \
1434 jit_int32_t reg = jit_get_reg(jit_class_fpr); \
1435 vfp_movi_d(rn(reg), i0); \
1436 vfp_##name##r_d(r0, r1, rn(reg)); \
1437 jit_unget_reg(reg); \
1438}
1439# define fbopi(name) \
1440static jit_word_t \
1441_vfp_b##name##i_f(jit_state_t *_jit, \
1442 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \
1443{ \
1444 jit_word_t word; \
1445 jit_int32_t reg = jit_get_reg(jit_class_fpr| \
1446 jit_class_nospill); \
1447 vfp_movi_f(rn(reg), i0); \
1448 word = vfp_b##name##r_f(r0, r1, rn(reg)); \
1449 jit_unget_reg(reg); \
1450 return (word); \
1451}
1452# define dbopi(name) \
1453static jit_word_t \
1454_vfp_b##name##i_d(jit_state_t *_jit, \
1455 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \
1456{ \
1457 jit_word_t word; \
1458 jit_int32_t reg = jit_get_reg(jit_class_fpr| \
1459 jit_class_nospill); \
1460 vfp_movi_d(rn(reg), i0); \
1461 word = vfp_b##name##r_d(r0, r1, rn(reg)); \
1462 jit_unget_reg(reg); \
1463 return (word); \
1464}
1465
1466fopi(add)
1467dopi(add)
1468fopi(sub)
1469fopi(rsb)
1470dopi(rsb)
1471dopi(sub)
1472fopi(mul)
1473dopi(mul)
1474fopi(div)
1475dopi(div)
1476
1477static void
1478_vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1479{
1480 jit_int32_t rg0, rg1;
1481 if (jit_fpr_p(r0)) {
1482 if (jit_fpr_p(r1))
1483 VCMP_F32(r0, r1);
1484 else {
1485 rg1 = jit_get_reg(jit_class_fpr);
1486 VMOV_S_A(rn(rg1), r1);
1487 VCMP_F32(r0, rn(rg1));
1488 jit_unget_reg(rg1);
1489 }
1490 }
1491 else {
1492 rg0 = jit_get_reg(jit_class_fpr);
1493 VMOV_S_A(rn(rg0), r0);
1494 if (jit_fpr_p(r1))
1495 VCMP_F32(rn(rg0), r1);
1496 else {
1497 rg1 = jit_get_reg(jit_class_fpr);
1498 VMOV_S_A(rn(rg1), r1);
1499 VCMP_F32(rn(rg0), rn(rg1));
1500 jit_unget_reg(rg1);
1501 }
1502 jit_unget_reg(rg0);
1503 }
1504}
1505
1506static void
1507_vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1508{
1509 jit_int32_t rg0, rg1;
1510 if (jit_fpr_p(r0)) {
1511 if (jit_fpr_p(r1))
1512 VCMP_F64(r0, r1);
1513 else {
1514 rg1 = jit_get_reg(jit_class_fpr);
1515 VMOV_D_AA(rn(rg1), r1, r1 + 1);
1516 VCMP_F64(r0, rn(rg1));
1517 jit_unget_reg(rg1);
1518 }
1519 }
1520 else {
1521 rg0 = jit_get_reg(jit_class_fpr);
1522 VMOV_D_AA(rn(rg0), r0, r0 + 1);
1523 if (jit_fpr_p(r1))
1524 VCMP_F64(rn(rg0), r1);
1525 else {
1526 rg1 = jit_get_reg(jit_class_fpr);
1527 VMOV_D_AA(rn(rg1), r1, r1 + 1);
1528 VCMP_F64(rn(rg0), rn(rg1));
1529 jit_unget_reg(rg1);
1530 }
1531 jit_unget_reg(rg0);
1532 }
1533}
1534
1535static void
1536_vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
1537{
1538 VMRS(_R15_REGNO);
1539 if (jit_thumb_p()) {
1540 if ((c0 ^ c1) >> 28 == 1) {
1541 ITE(c0);
1542 if (r0 < 8) {
1543 T1_MOVI(r0, 0);
1544 T1_MOVI(r0, 1);
1545 }
1546 else {
1547 T2_MOVI(r0, 0);
1548 T2_MOVI(r0, 1);
1549 }
1550 }
1551 else {
1552 if (r0 < 8) {
1553 IT(c0);
1554 T1_MOVI(r0, 0);
1555 IT(c1);
1556 T1_MOVI(r0, 1);
1557 }
1558 else {
1559 IT(c0);
1560 T2_MOVI(r0, 0);
1561 IT(c1);
1562 T2_MOVI(r0, 1);
1563 }
1564 }
1565 }
1566 else {
1567 CC_MOVI(c0, r0, 0);
1568 CC_MOVI(c1, r0, 1);
1569 }
1570}
1571
1572static void
1573_vcmp01_f(jit_state_t *_jit, int c0, int c1,
1574 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1575{
1576 vfp_cmp_f(r1, r2);
1577 vcmp01_x(c0, c1, r0);
1578}
1579
1580static void
1581_vcmp01_d(jit_state_t *_jit, int c0, int c1,
1582 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1583{
1584 vfp_cmp_d(r1, r2);
1585 vcmp01_x(c0, c1, r0);
1586}
1587
1588static void
1589_vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1590{
1591 if (jit_thumb_p()) {
1592 if (r0 < 8) {
1593 T1_MOVI(r0, 1);
1594 VMRS(_R15_REGNO);
1595 IT(cc);
1596 T1_MOVI(r0, 0);
1597 }
1598 else {
1599 T2_MOVI(r0, 1);
1600 VMRS(_R15_REGNO);
1601 IT(cc);
1602 T2_MOVI(r0, 0);
1603 }
1604 }
1605 else {
1606 VMRS(_R15_REGNO);
1607 MOVI(r0, 1);
1608 CC_MOVI(cc, r0, 0);
1609 }
1610}
1611static void
1612_vcmp_10_f(jit_state_t *_jit, int cc,
1613 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1614{
1615 vfp_cmp_f(r1, r2);
1616 vcmp10_x(cc, r0);
1617}
1618
1619static void
1620_vcmp_10_d(jit_state_t *_jit, int cc,
1621 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1622{
1623 vfp_cmp_d(r1, r2);
1624 vcmp10_x(cc, r0);
1625}
1626
1627fopi(lt)
1628dopi(lt)
1629fopi(le)
1630dopi(le)
1631fopi(eq)
1632dopi(eq)
1633fopi(ge)
1634dopi(ge)
1635fopi(gt)
1636dopi(gt)
1637fopi(ne)
1638dopi(ne)
1639fopi(unlt)
1640dopi(unlt)
1641fopi(unle)
1642dopi(unle)
1643
1644static void
1645_vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
1646{
1647 VMRS(_R15_REGNO);
1648 if (jit_thumb_p()) {
1649 ITE(ARM_CC_NE);
1650 if (r0 < 8) {
1651 T1_MOVI(r0, 0);
1652 T1_MOVI(r0, 1);
1653 IT(ARM_CC_VS);
1654 T1_MOVI(r0, 1);
1655 }
1656 else {
1657 T2_MOVI(r0, 0);
1658 T2_MOVI(r0, 1);
1659 IT(ARM_CC_VS);
1660 T2_MOVI(r0, 1);
1661 }
1662 }
1663 else {
1664 CC_MOVI(ARM_CC_NE, r0, 0);
1665 CC_MOVI(ARM_CC_EQ, r0, 1);
1666 CC_MOVI(ARM_CC_VS, r0, 1);
1667 }
1668}
1669
1670static void
1671_vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1672{
1673 vfp_cmp_f(r1, r2);
1674 vfp_uneqr_x(r0);
1675}
1676
1677fopi(uneq)
1678
1679static void
1680_vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1681{
1682 vfp_cmp_d(r1, r2);
1683 vfp_uneqr_x(r0);
1684}
1685
1686dopi(uneq)
1687
1688static void
1689_vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1690{
1691 if (jit_thumb_p()) {
1692 if (r0 < 8) {
1693 T1_MOVI(r0, 0);
1694 VMRS(_R15_REGNO);
1695 IT(cc);
1696 T1_MOVI(r0, 1);
1697 }
1698 else {
1699 T2_MOVI(r0, 0);
1700 VMRS(_R15_REGNO);
1701 IT(cc);
1702 T2_MOVI(r0, 1);
1703 }
1704 }
1705 else {
1706 MOVI(r0, 0);
1707 VMRS(_R15_REGNO);
1708 CC_MOVI(cc, r0, 1);
1709 }
1710}
1711
1712static void
1713_vcmp_01_f(jit_state_t *_jit, int cc,
1714 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1715{
1716 vfp_cmp_f(r1, r2);
1717 vcmp_01_x(cc, r0);
1718}
1719
1720static void
1721_vcmp_01_d(jit_state_t *_jit, int cc,
1722 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1723{
1724 vfp_cmp_d(r1, r2);
1725 vcmp_01_x(cc, r0);
1726}
1727
1728fopi(unge)
1729dopi(unge)
1730fopi(ungt)
1731dopi(ungt)
1732
1733static void
1734_vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
1735{
1736 VMRS(_R15_REGNO);
1737 if (jit_thumb_p()) {
1738 ITE(ARM_CC_NE);
1739 if (r0 < 8) {
1740 T1_MOVI(r0, 1);
1741 T1_MOVI(r0, 0);
1742 IT(ARM_CC_VS);
1743 T1_MOVI(r0, 0);
1744 }
1745 else {
1746 T2_MOVI(r0, 1);
1747 T2_MOVI(r0, 0);
1748 IT(ARM_CC_VS);
1749 T2_MOVI(r0, 0);
1750 }
1751 }
1752 else {
1753 CC_MOVI(ARM_CC_NE, r0, 1);
1754 CC_MOVI(ARM_CC_EQ, r0, 0);
1755 CC_MOVI(ARM_CC_VS, r0, 0);
1756 }
1757}
1758
1759static void
1760_vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1761{
1762 vfp_cmp_f(r1, r2);
1763 vfp_ltgtr_x(r0);
1764}
1765
1766fopi(ltgt)
1767
1768static void
1769_vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1770{
1771 vfp_cmp_d(r1, r2);
1772 vfp_ltgtr_x(r0);
1773}
1774
1775dopi(ltgt)
1776
1777static void
1778_vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1779{
1780 vfp_cmp_f(r1, r2);
1781 vcmp10_x(ARM_CC_VS, r0);
1782}
1783
1784fopi(ord)
1785
1786static void
1787_vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1788{
1789 vfp_cmp_d(r1, r2);
1790 vcmp10_x(ARM_CC_VS, r0);
1791}
1792
1793dopi(ord)
1794
1795static void
1796_vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1797{
1798 vfp_cmp_f(r1, r2);
1799 vcmp_01_x(ARM_CC_VS, r0);
1800}
1801
1802fopi(unord)
1803
1804static void
1805_vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1806{
1807 vfp_cmp_d(r1, r2);
1808 vcmp_01_x(ARM_CC_VS, r0);
1809}
1810
1811dopi(unord)
1812
1813static jit_word_t
1814_vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
1815{
1816 jit_word_t d, w;
1817 VMRS(_R15_REGNO);
1818 w = _jit->pc.w;
1819 if (jit_thumb_p()) {
1820 d = ((i0 - w) >> 1) - 2;
1821 assert(_s20P(d));
1822 T2_CC_B(cc, encode_thumb_cc_jump(d));
1823 }
1824 else {
1825 d = ((i0 - w) >> 2) - 2;
1826 assert(_s24P(d));
1827 CC_B(cc, d & 0x00ffffff);
1828 }
1829 return (w);
1830}
1831
1832
1833static jit_word_t
1834_vbcmp_f(jit_state_t *_jit, int cc,
1835 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1836{
1837 vfp_cmp_f(r0, r1);
1838 return (vbcmp_x(cc, i0));
1839}
1840
1841static jit_word_t
1842_vbcmp_d(jit_state_t *_jit, int cc,
1843 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1844{
1845 vfp_cmp_d(r0, r1);
1846 return (vbcmp_x(cc, i0));
1847}
1848
1849static jit_word_t
1850_vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
1851{
1852 jit_word_t d, p, w;
1853 VMRS(_R15_REGNO);
1854 p = _jit->pc.w;
1855 if (jit_thumb_p()) {
1856 T2_CC_B(cc, 0);
1857 w = _jit->pc.w;
1858 d = ((i0 - w) >> 1) - 2;
1859 assert(_s20P(d));
1860 T2_B(encode_thumb_jump(d));
1861 }
1862 else {
1863 CC_B(cc, 0);
1864 w = _jit->pc.w;
1865 d = ((i0 - w) >> 2) - 2;
1866 assert(_s24P(d));
1867 B(d & 0x00ffffff);
1868 }
1869 patch_at(arm_patch_jump, p, _jit->pc.w);
1870 return (w);
1871}
1872
1873static jit_word_t
1874_vbncmp_f(jit_state_t *_jit, int cc,
1875 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1876{
1877 vfp_cmp_f(r0, r1);
1878 return (vbncmp_x(cc, i0));
1879}
1880
1881static jit_word_t
1882_vbncmp_d(jit_state_t *_jit, int cc,
1883 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1884{
1885 vfp_cmp_d(r0, r1);
1886 return (vbncmp_x(cc, i0));
1887}
1888
1889fbopi(lt)
1890dbopi(lt)
1891fbopi(le)
1892dbopi(le)
1893fbopi(eq)
1894dbopi(eq)
1895fbopi(ge)
1896dbopi(ge)
1897fbopi(gt)
1898dbopi(gt)
1899fbopi(ne)
1900dbopi(ne)
1901fbopi(unlt)
1902dbopi(unlt)
1903fbopi(unle)
1904dbopi(unle)
1905
1906static jit_word_t
1907_vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
1908{
1909 jit_word_t d, p, q, w;
1910 VMRS(_R15_REGNO);
1911 p = _jit->pc.w;
1912 if (jit_thumb_p()) {
1913 T2_CC_B(ARM_CC_VS, 0);
1914 q = _jit->pc.w;
1915 T2_CC_B(ARM_CC_NE, 0);
1916 patch_at(arm_patch_jump, p, _jit->pc.w);
1917 w = _jit->pc.w;
1918 d = ((i0 - w) >> 1) - 2;
1919 assert(_s20P(d));
1920 T2_B(encode_thumb_jump(d));
1921 }
1922 else {
1923 CC_B(ARM_CC_VS, 0);
1924 q = _jit->pc.w;
1925 CC_B(ARM_CC_NE, 0);
1926 patch_at(arm_patch_jump, p, _jit->pc.w);
1927 w = _jit->pc.w;
1928 d = ((i0 - w) >> 2) - 2;
1929 assert(_s24P(d));
1930 B(d & 0x00ffffff);
1931 }
1932 patch_at(arm_patch_jump, q, _jit->pc.w);
1933 return (w);
1934}
1935
1936static jit_word_t
1937_vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1938{
1939 vfp_cmp_f(r0, r1);
1940 return (vfp_buneqr_x(i0));
1941}
1942
1943fbopi(uneq)
1944
1945static jit_word_t
1946_vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1947{
1948 vfp_cmp_d(r0, r1);
1949 return (vfp_buneqr_x(i0));
1950}
1951
1952dbopi(uneq)
1953
1954static jit_word_t
1955_vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
1956{
1957 jit_word_t d, p, w;
1958 VMRS(_R15_REGNO);
1959 p = _jit->pc.w;
1960 if (jit_thumb_p()) {
1961 T2_CC_B(ARM_CC_MI, 0);
1962 w = _jit->pc.w;
1963 d = ((i0 - w) >> 1) - 2;
1964 assert(_s20P(d));
1965 T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
1966 }
1967 else {
1968 CC_B(ARM_CC_MI, 0);
1969 w = _jit->pc.w;
1970 d = ((i0 - w) >> 2) - 2;
1971 assert(_s24P(d));
1972 CC_B(ARM_CC_HS, d & 0x00ffffff);
1973 }
1974 patch_at(arm_patch_jump, p, _jit->pc.w);
1975 return (w);
1976}
1977
1978static jit_word_t
1979_vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1980{
1981 vfp_cmp_f(r0, r1);
1982 return (vfp_bunger_x(i0));
1983}
1984
1985fbopi(unge)
1986
1987static jit_word_t
1988_vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1989{
1990 vfp_cmp_d(r0, r1);
1991 return (vfp_bunger_x(i0));
1992}
1993
1994dbopi(unge)
1995
1996static jit_word_t
1997_vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
1998{
1999 jit_word_t d, p, q, w;
2000 VMRS(_R15_REGNO);
2001 p = _jit->pc.w;
2002 if (jit_thumb_p()) {
2003 T2_CC_B(ARM_CC_VS, 0);
2004 q = _jit->pc.w;
2005 T2_CC_B(ARM_CC_EQ, 0);
2006 w = _jit->pc.w;
2007 d = ((i0 - w) >> 1) - 2;
2008 assert(_s20P(d));
2009 T2_B(encode_thumb_jump(d));
2010 }
2011 else {
2012 CC_B(ARM_CC_VS, 0);
2013 q = _jit->pc.w;
2014 CC_B(ARM_CC_EQ, 0);
2015 w = _jit->pc.w;
2016 d = ((i0 - w) >> 2) - 2;
2017 assert(_s24P(d));
2018 B(d & 0x00ffffff);
2019 }
2020 patch_at(arm_patch_jump, p, _jit->pc.w);
2021 patch_at(arm_patch_jump, q, _jit->pc.w);
2022 return (w);
2023}
2024
2025static jit_word_t
2026_vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2027{
2028 vfp_cmp_f(r0, r1);
2029 return (vfp_bltgtr_x(i0));
2030}
2031
2032fbopi(ungt)
2033dbopi(ungt)
2034fbopi(ltgt)
2035
2036static jit_word_t
2037_vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2038{
2039 vfp_cmp_d(r0, r1);
2040 return (vfp_bltgtr_x(i0));
2041}
2042
2043dbopi(ltgt)
2044fbopi(ord)
2045dbopi(ord)
2046fbopi(unord)
2047dbopi(unord)
2048
2049static void
2050_vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2051{
2052 jit_int32_t gpr;
2053 if (jit_fpr_p(r0)) {
2054 gpr = jit_get_reg(jit_class_gpr);
2055 movi(rn(gpr), i0);
2056 VLDR_F32(r0, rn(gpr), 0);
2057 jit_unget_reg(gpr);
2058 }
2059 else
2060 ldi_i(r0, i0);
2061}
2062
2063static void
2064_vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2065{
2066 jit_int32_t reg;
2067 reg = jit_get_reg(jit_class_gpr);
2068 movi(rn(reg), i0);
2069 if (jit_fpr_p(r0))
2070 VLDR_F64(r0, rn(reg), 0);
2071 else {
2072 ldr_i(r0, rn(reg));
2073 ldxi_i(r0 + 1, rn(reg), 4);
2074 }
2075 jit_unget_reg(reg);
2076}
2077
2078static void
2079_vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2080{
2081 jit_int32_t reg;
2082 if (jit_fpr_p(r0)) {
2083 reg = jit_get_reg(jit_class_gpr);
2084 addr(rn(reg), r1, r2);
2085 VLDR_F32(r0, rn(reg), 0);
2086 jit_unget_reg(reg);
2087 }
2088 else
2089 ldxr_i(r0, r1, r2);
2090}
2091
2092static void
2093_vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2094{
2095 jit_int32_t reg;
2096 reg = jit_get_reg(jit_class_gpr);
2097 addr(rn(reg), r1, r2);
2098 if (jit_fpr_p(r0))
2099 VLDR_F64(r0, rn(reg), 0);
2100 else {
2101 ldr_i(r0, rn(reg));
2102 ldxi_i(r0 + 1, rn(reg), 4);
2103 }
2104 jit_unget_reg(reg);
2105}
2106
2107static void
2108_vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2109{
2110 jit_int32_t reg;
2111 if (jit_fpr_p(r0)) {
2112 if (i0 >= 0) {
2113 assert(!(i0 & 3));
2114 if (i0 < 1024)
2115 VLDR_F32(r0, r1, i0 >> 2);
2116 else {
2117 reg = jit_get_reg(jit_class_gpr);
2118 addi(rn(reg), r1, i0);
2119 VLDR_F32(r0, rn(reg), 0);
2120 jit_unget_reg(reg);
2121 }
2122 }
2123 else {
2124 i0 = -i0;
2125 assert(!(i0 & 3));
2126 if (i0 < 1024)
2127 VLDRN_F32(r0, r1, i0 >> 2);
2128 else {
2129 reg = jit_get_reg(jit_class_gpr);
2130 subi(rn(reg), r1, i0);
2131 VLDR_F32(r0, rn(reg), 0);
2132 jit_unget_reg(reg);
2133 }
2134 }
2135 }
2136 else
2137 ldxi_i(r0, r1, i0);
2138}
2139
2140static void
2141_vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2142{
2143 jit_int32_t reg;
2144 if (jit_fpr_p(r0)) {
2145 if (i0 >= 0) {
2146 assert(!(i0 & 3));
2147 if (i0 < 1024)
2148 VLDR_F64(r0, r1, i0 >> 2);
2149 else {
2150 reg = jit_get_reg(jit_class_gpr);
2151 addi(rn(reg), r1, i0);
2152 VLDR_F64(r0, rn(reg), 0);
2153 jit_unget_reg(reg);
2154 }
2155 }
2156 else {
2157 i0 = -i0;
2158 assert(!(i0 & 3));
2159 if (i0 < 1024)
2160 VLDRN_F64(r0, r1, i0 >> 2);
2161 else {
2162 reg = jit_get_reg(jit_class_gpr);
2163 subi(rn(reg), r1, i0);
2164 VLDR_F64(r0, rn(reg), 0);
2165 jit_unget_reg(reg);
2166 }
2167 }
2168 }
2169 else {
2170 reg = jit_get_reg(jit_class_gpr);
2171 addi(rn(reg), r1, i0);
2172 ldr_i(r0, rn(reg));
2173 ldxi_i(r0 + 1, rn(reg), 4);
2174 jit_unget_reg(reg);
2175 }
2176}
2177
2178static void
2179_vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2180{
2181 jit_int32_t reg;
2182 if (jit_fpr_p(r0)) {
2183 reg = jit_get_reg(jit_class_gpr);
2184 movi(rn(reg), i0);
2185 VSTR_F32(r0, rn(reg), 0);
2186 jit_unget_reg(reg);
2187 }
2188 else
2189 sti_i(i0, r0);
2190}
2191
2192static void
2193_vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2194{
2195 jit_int32_t reg;
2196 reg = jit_get_reg(jit_class_gpr);
2197 movi(rn(reg), i0);
2198 if (jit_fpr_p(r0))
2199 VSTR_F64(r0, rn(reg), 0);
2200 else {
2201 str_i(rn(reg), r0);
2202 stxi_i(4, rn(reg), r0 + 1);
2203 }
2204 jit_unget_reg(reg);
2205}
2206
2207static void
2208_vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2209{
2210 jit_int32_t reg;
2211 if (jit_fpr_p(r2)) {
2212 reg = jit_get_reg(jit_class_gpr);
2213 addr(rn(reg), r0, r1);
2214 VSTR_F32(r2, rn(reg), 0);
2215 jit_unget_reg(reg);
2216 }
2217 else
2218 stxr_i(r0, r1, r2);
2219}
2220
2221static void
2222_vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2223{
2224 jit_int32_t reg;
2225 reg = jit_get_reg(jit_class_gpr);
2226 addr(rn(reg), r0, r1);
2227 if (jit_fpr_p(r2))
2228 VSTR_F64(r2, rn(reg), 0);
2229 else {
2230 str_i(rn(reg), r2);
2231 stxi_i(4, rn(reg), r2 + 1);
2232 }
2233 jit_unget_reg(reg);
2234}
2235
2236static void
2237_vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2238{
2239 jit_int32_t reg;
2240 if (jit_fpr_p(r1)) {
2241 if (i0 >= 0) {
2242 assert(!(i0 & 3));
2243 if (i0 < 1024)
2244 VSTR_F32(r1, r0, i0 >> 2);
2245 else {
2246 reg = jit_get_reg(jit_class_gpr);
2247 addi(rn(reg), r0, i0);
2248 VSTR_F32(r1, rn(reg), 0);
2249 jit_unget_reg(reg);
2250 }
2251 }
2252 else {
2253 i0 = -i0;
2254 assert(!(i0 & 3));
2255 if (i0 < 1024)
2256 VSTRN_F32(r1, r0, i0 >> 2);
2257 else {
2258 reg = jit_get_reg(jit_class_gpr);
2259 subi(rn(reg), r0, i0);
2260 VSTR_F32(r1, rn(reg), 0);
2261 jit_unget_reg(reg);
2262 }
2263 }
2264 }
2265 else
2266 stxi_i(i0, r0, r1);
2267}
2268
2269static void
2270_vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2271{
2272 jit_int32_t reg;
2273 if (jit_fpr_p(r1)) {
2274 if (i0 >= 0) {
2275 assert(!(i0 & 3));
2276 if (i0 < 0124)
2277 VSTR_F64(r1, r0, i0 >> 2);
2278 else {
2279 reg = jit_get_reg(jit_class_gpr);
2280 addi(rn(reg), r0, i0);
2281 VSTR_F64(r1, rn(reg), 0);
2282 jit_unget_reg(reg);
2283 }
2284 }
2285 else {
2286 i0 = -i0;
2287 assert(!(i0 & 3));
2288 if (i0 < 1024)
2289 VSTRN_F64(r1, r0, i0 >> 2);
2290 else {
2291 reg = jit_get_reg(jit_class_gpr);
2292 subi(rn(reg), r0, i0);
2293 VSTR_F64(r1, rn(reg), 0);
2294 jit_unget_reg(reg);
2295 }
2296 }
2297 }
2298 else {
2299 reg = jit_get_reg(jit_class_gpr);
2300 addi(rn(reg), r0, i0);
2301 str_i(rn(reg), r1);
2302 stxi_i(4, rn(reg), r1 + 1);
2303 jit_unget_reg(reg);
2304 }
2305}
2306
2307static void
2308_vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2309{
2310 jit_int32_t reg;
2311
2312 assert(_jitc->function->self.call & jit_call_varargs);
2313
2314 /* Adjust pointer. */
2315 reg = jit_get_reg(jit_class_gpr);
2316 andi(rn(reg), r1, 7);
2317 addr(r1, r1, rn(reg));
2318 jit_unget_reg(reg);
2319
2320 /* Load argument. */
2321 vfp_ldr_d(r0, r1);
2322
2323 /* Update stack pointer. */
2324 addi(r1, r1, sizeof(jit_float64_t));
2325}
2326# undef dbopi
2327# undef fbopi
2328# undef dopi
2329# undef fopi
2330#endif