git subrepo pull (merge) --force deps/lightning
[pcsx_rearmed.git] / deps / lightning / lib / jit_arm-vfp.c
1 /*
2  * Copyright (C) 2012-2023  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #if PROTO
21 /* as per vfp_regno macro, required due to "support" to soft float registers
22  * or using integer registers as arguments to float operations */
23 #  define _D8_REGNO                     32
24 #  define ARM_V_Q                       0x00000040
25 #  define FPSCR_N                       0x80000000 /* Negative flag */
26 #  define FPSCR_Z                       0x40000000 /* Zero flag */
27 #  define FPSCR_C                       0x20000000 /* Carry flag */
28 #  define FPSCR_V                       0x10000000 /* Overflow flag */
29 #  define FPSCR_QC                      0x08000000 /* Cumulative saturation */
30 #  define FPSCR_AHP                     0x04000000 /* Alt. half-precision */
31 #  define FPSCR_DN                      0x02000000 /* Default NaN mode */
32 #  define FPSCR_FZ                      0x01000000 /* Flush to zero */
33 #  define FPSCR_RMASK                   0x00c00000
34 #    define FPSCR_RN                    0x00000000 /* Round to Nearest */
35 #    define FPSCR_RP                    0x00400000 /* Round to Plus Infinity */
36 #    define FPSCR_RM                    0x00800000 /* Round to Minus Infinity */
37 #    define FPSCR_RZ                    0x00c00000 /* Round towards Zero */
38 #  define FPSCR_STRIDE                  0x00300000
39 #  define FPSCR_RES1                    0x00080000 /* Reserved, UNK/SBZP */
40 #  define FPSCR_LEN                     0x00070000
41 #  define FPSCR_IDE                     0x00008000 /* Input Denormal trap */
42 #  define FPSCR_IXE                     0x00001000 /* Inexact trap */
43 #  define FPSCR_UFE                     0x00000800 /* Underflow trap */
44 #  define FPSCR_OFE                     0x00000400 /* Overflow trap */
45 #  define FPSCR_DZE                     0x00000200 /* Division by zero trap */
46 #  define FPSCR_IOE                     0x00000100 /* Invalid Operation trap */
47 #  define FPSCR_IDC                     0x00000080 /* Input Denormal flag */
48 #  define FPSCR_RES0                    0x00000060 /* Reserved, UNK/SBZP */
49 #  define FPSCR_IXC                     0x00000010 /* Inexact flag */
50 #  define FPSCR_UFC                     0x00000008 /* Underflow flag */
51 #  define FPSCR_OFC                     0x00000004 /* Overflow flag */
52 #  define FPSCR_DZC                     0x00000002 /* Division by zero flag */
53 #  define FPSCR_IOC                     0x00000001 /* Invalid Operation flag */
54 #  define ARM_V_E                       0x00000080 /* ARM_VCMP except if NaN */
55 #  define ARM_V_Z                       0x00010000 /* ARM_VCMP with zero */
56 #  define ARM_V_F64                     0x00000100
57 #  define ARM_VADD_F                    0x0e300a00
58 #  define ARM_VSUB_F                    0x0e300a40
59 #  define ARM_VMUL_F                    0x0e200a00
60 #  define ARM_VDIV_F                    0x0e800a00
61 #  define ARM_VABS_F                    0x0eb00ac0
62 #  define ARM_VNEG_F                    0x0eb10a40
63 #  define ARM_VSQRT_F                   0x0eb10ac0
64 #  define ARM_VMOV_F                    0x0eb00a40
65 #  define ARM_VMOV_A_S                  0x0e100a10 /* vmov rn, sn */
66 #  define ARM_VMOV_S_A                  0x0e000a10 /* vmov sn, rn */
67 #  define ARM_VMOV_AA_D                 0x0c500b10 /* vmov rn,rn, dn */
68 #  define ARM_VMOV_D_AA                 0x0c400b10 /* vmov dn, rn,rn */
69 #  define ARM_VCMP                      0x0eb40a40
70 #  define ARM_VMRS                      0x0ef10a10
71 #  define ARM_VMSR                      0x0ee10a10
72 #  define ARM_VCVT_2I                   0x00040000 /* to integer */
73 #  define ARM_VCVT_2S                   0x00010000 /* to signed */
74 #  define ARM_VCVT_RS                   0x00000080 /* round to zero or signed */
75 #  define ARM_VCVT                      0x0eb80a40
76 #  define ARM_VCVT_S32_F32              ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
77 #  define ARM_VCVT_U32_F32              ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
78 #  define ARM_VCVT_S32_F64              ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
79 #  define ARM_VCVT_U32_F64              ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
80 #  define ARM_VCVT_F32_S32              ARM_VCVT|ARM_VCVT_RS
81 #  define ARM_VCVT_F32_U32              ARM_VCVT
82 #  define ARM_VCVT_F64_S32              ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
83 #  define ARM_VCVT_F64_U32              ARM_VCVT|ARM_V_F64
84 #  define ARM_VCVT_F                    0x0eb70ac0
85 #  define ARM_VCVT_F32_F64              ARM_VCVT_F
86 #  define ARM_VCVT_F64_F32              ARM_VCVT_F|ARM_V_F64
87 #  define ARM_VCVTR_S32_F32             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
88 #  define ARM_VCVTR_U32_F32             ARM_VCVT|ARM_VCVT_2I
89 #  define ARM_VCVTR_S32_F64             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
90 #  define ARM_VCVTR_U32_F64             ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
91 #  define ARM_VFMA                      0x0ea00a00
92 #  define ARM_VFMS                      0x0ea00a40
93 #  define ARM_VFNMA                     0x0e900a00
94 #  define ARM_VFNMS                     0x0e900a40
95 #  define ARM_V_D                       0x00400000
96 #  define ARM_V_N                       0x00000080
97 #  define ARM_V_Q                       0x00000040
98 #  define ARM_V_M                       0x00000020
99 #  define ARM_V_U                       0x01000000
100 #  define ARM_V_I16                     0x00100000
101 #  define ARM_V_I32                     0x00200000
102 #  define ARM_V_I64                     0x00300000
103 #  define ARM_V_S16                     0x00040000
104 #  define ARM_V_S32                     0x00080000
105 #  define ARM_VADD_I                    0x02000800
106 #  define ARM_VQADD_I                   0x02000010 /* set flag on over/carry */
107 #  define ARM_VADDL_I                   0x02800000 /* q=d+d */
108 #  define ARM_VADDW_I                   0x02800100 /* q=q+d */
109 #  define ARM_VSUB_I                    0x03000800
110 #  define ARM_VQSUB_I                   0x02000210 /* set flag on over/carry */
111 #  define ARM_VSUBL_I                   0x02800200
112 #  define ARM_VSUBW_I                   0x02800300
113 #  define ARM_VMUL_I                    0x02000910
114 #  define ARM_VMULL_I                   0x02800c00
115 #  define ARM_VABS_I                    0x03b10300
116 #  define ARM_VQABS_I                   0x03b00700 /* sets flag on overflow */
117 #  define ARM_VNEG_I                    0x03b10380
118 #  define ARM_VQNEG_I                   0x03b00780 /* sets flag on overflow */
119 #  define ARM_VAND                      0x02000110
120 #  define ARM_VBIC                      0x02100110
121 #  define ARM_VORR                      0x02200110
122 #  define ARM_VORN                      0x02300110
123 #  define ARM_VEOR                      0x03000110
124 #  define ARM_VMOVL_S8                  0x00080000
125 #  define ARM_VMOVL_S16                 0x00100000
126 #  define ARM_VMOVL_S32                 0x00200000
127 #  define ARM_VMOVL_I                   0x02800a10
128 #  define ARM_VMOVI                     0x02800010
129 #  define ARM_VMVNI                     0x02800030
130 #  define ARM_VLDR                      0x0d100a00
131 #  define ARM_VSTR                      0x0d000a00
132 #  define ARM_VM                        0x0c000a00
133 #  define ARM_VMOV_ADV_U                0x00800000 /* zero extend */
134 #  define ARM_VMOV_ADV_8                0x00400000
135 #  define ARM_VMOV_ADV_16               0x00000020
136 #  define ARM_VMOV_A_D                  0x0e100b10
137 #  define ARM_VMOV_D_A                  0x0e000b10
138 #  define ARM_VCNT                      0x03b00500
139 #  define vodi(oi,r0)                   _vodi(_jit,oi,r0)
140 static void _vodi(jit_state_t*,int,int) maybe_unused;
141 #  define voqi(oi,r0)                   _voqi(_jit,oi,r0)
142 static void _voqi(jit_state_t*,int,int) maybe_unused;
143 #  define vo_ss(o,r0,r1)                _cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
144 #  define cc_vo_ss(cc,o,r0,r1)          _cc_vo_ss(_jit,cc,o,r0,r1)
145 static void _cc_vo_ss(jit_state_t*,int,int,int,int);
146 #  define vo_dd(o,r0,r1)                _cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
147 #  define cc_vo_dd(cc,o,r0,r1)          _cc_vo_dd(_jit,cc,o,r0,r1)
148 static void _cc_vo_dd(jit_state_t*,int,int,int,int);
149 #  define vo_qd(o,r0,r1)                _cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
150 #  define cc_vo_qd(cc,o,r0,r1)          _cc_vo_qd(_jit,cc,o,r0,r1)
151 static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
152 #  define vo_qq(o,r0,r1)                _cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
153 #  define cc_vo_qq(cc,o,r0,r1)          _cc_vo_qq(_jit,cc,o,r0,r1)
154 static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
155 #  define vorr_(o,r0,r1)                _cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
156 #  define cc_vorr_(cc,o,r0,r1)          _cc_vorr_(_jit,cc,o,r0,r1)
157 static void _cc_vorr_(jit_state_t*,int,int,int,int);
158 #  define vors_(o,r0,r1)                _cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
159 #  define cc_vors_(cc,o,r0,r1)          _cc_vors_(_jit,cc,o,r0,r1)
160 static void _cc_vors_(jit_state_t*,int,int,int,int);
161 #  define vorv_(o,r0,r1)                _cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
162 #  define cc_vorv_(cc,o,r0,r1)          _cc_vorv_(_jit,cc,o,r0,r1)
163 static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
164 #  define vo_vv(o,r0,r1)                _cc_vo_vv(_jit,ARM_CC_NV,o,r0,r1)
165 static void _cc_vo_vv(jit_state_t*,int,int,int,int) maybe_unused;
166 #  define vori_(o,r0,r1)                _cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
167 #  define cc_vori_(cc,o,r0,r1)          _cc_vori_(_jit,cc,o,r0,r1)
168 static void _cc_vori_(jit_state_t*,int,int,int,int);
169 #  define vorrd(o,r0,r1,r2)             _cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
170 #  define cc_vorrd(cc,o,r0,r1,r2)       _cc_vorrd(_jit,cc,o,r0,r1,r2)
171 static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
172 #  define vosss(o,r0,r1,r2)             _cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
173 #  define cc_vosss(cc,o,r0,r1,r2)       _cc_vosss(_jit,cc,o,r0,r1,r2)
174 static void _cc_vosss(jit_state_t*,int,int,int,int,int);
175 #  define voddd(o,r0,r1,r2)             _cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
176 #  define cc_voddd(cc,o,r0,r1,r2)       _cc_voddd(_jit,cc,o,r0,r1,r2)
177 static void _cc_voddd(jit_state_t*,int,int,int,int,int);
178 #  define voqdd(o,r0,r1,r2)             _cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
179 #  define cc_voqdd(cc,o,r0,r1,r2)       _cc_voqdd(_jit,cc,o,r0,r1,r2)
180 static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
181 #  define voqqd(o,r0,r1,r2)             _cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
182 #  define cc_voqqd(cc,o,r0,r1,r2)       _cc_voqqd(_jit,cc,o,r0,r1,r2)
183 static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
184 #  define voqqq(o,r0,r1,r2)             _cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
185 #  define cc_voqqq(cc,o,r0,r1,r2)       _cc_voqqq(_jit,cc,o,r0,r1,r2)
186 static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
187 #  define cc_vldst(cc,o,r0,r1,i0)       _cc_vldst(_jit,cc,o,r0,r1,i0)
188 static void _cc_vldst(jit_state_t*,int,int,int,int,int);
189 #  define cc_vorsl(cc,o,r0,r1,i0)       _cc_vorsl(_jit,cc,o,r0,r1,i0)
190 static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
191 #  define CC_VADD_F32(cc,r0,r1,r2)      cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
192 #  define VADD_F32(r0,r1,r2)            CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
193 #  define CC_VADD_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
194 #  define VADD_F64(r0,r1,r2)            CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
195 #  define CC_VSUB_F32(cc,r0,r1,r2)      cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
196 #  define VSUB_F32(r0,r1,r2)            CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
197 #  define CC_VSUB_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
198 #  define VSUB_F64(r0,r1,r2)            CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
199 #  define CC_VMUL_F32(cc,r0,r1,r2)      cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
200 #  define VMUL_F32(r0,r1,r2)            CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
201 #  define CC_VMUL_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
202 #  define VMUL_F64(r0,r1,r2)            CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
203 #  define CC_VDIV_F32(cc,r0,r1,r2)      cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
204 #  define VDIV_F32(r0,r1,r2)            CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
205 #  define CC_VDIV_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
206 #  define VDIV_F64(r0,r1,r2)            CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
207 #  define CC_VABS_F32(cc,r0,r1)         cc_vo_ss(cc,ARM_VABS_F,r0,r1)
208 #  define VABS_F32(r0,r1)               CC_VABS_F32(ARM_CC_AL,r0,r1)
209 #  define CC_VABS_F64(cc,r0,r1)         cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
210 #  define VABS_F64(r0,r1)               CC_VABS_F64(ARM_CC_AL,r0,r1)
211 #  define CC_VNEG_F32(cc,r0,r1)         cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
212 #  define VNEG_F32(r0,r1)               CC_VNEG_F32(ARM_CC_AL,r0,r1)
213 #  define CC_VNEG_F64(cc,r0,r1)         cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
214 #  define VNEG_F64(r0,r1)               CC_VNEG_F64(ARM_CC_AL,r0,r1)
215 #  define CC_VSQRT_F32(cc,r0,r1)        cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
216 #  define VSQRT_F32(r0,r1)              CC_VSQRT_F32(ARM_CC_AL,r0,r1)
217 #  define CC_VSQRT_F64(cc,r0,r1)        cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
218 #  define VSQRT_F64(r0,r1)              CC_VSQRT_F64(ARM_CC_AL,r0,r1)
219 #  define CC_VFMA_F32(cc,r0,r1,r2)      cc_voddd(cc,ARM_VFMA,r0,r1,r2)
220 #  define VFMA_F32(r0,r1,r2)            CC_VFMA_F32(ARM_CC_AL,r0,r1,r2)
221 #  define CC_VFMA_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VFMA|ARM_V_F64,r0,r1,r2)
222 #  define VFMA_F64(r0,r1,r2)            CC_VFMA_F64(ARM_CC_AL,r0,r1,r2)
223 #  define CC_VFMS_F32(cc,r0,r1,r2)      cc_voddd(cc,ARM_VFMS,r0,r1,r2)
224 #  define VFMS_F32(r0,r1,r2)            CC_VFMS_F32(ARM_CC_AL,r0,r1,r2)
225 #  define CC_VFMS_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VFMS|ARM_V_F64,r0,r1,r2)
226 #  define VFMS_F64(r0,r1,r2)            CC_VFMS_F64(ARM_CC_AL,r0,r1,r2)
227 #  define CC_VFNMA_F32(cc,r0,r1,r2)     cc_voddd(cc,ARM_VFNMA,r0,r1,r2)
228 #  define VFNMA_F32(r0,r1,r2)           CC_VFNMA_F32(ARM_CC_AL,r0,r1,r2)
229 #  define CC_VFNMA_F64(cc,r0,r1,r2)     cc_voddd(cc,ARM_VFNMA|ARM_V_F64,r0,r1,r2)
230 #  define VFNMA_F64(r0,r1,r2)           CC_VFNMA_F64(ARM_CC_AL,r0,r1,r2)
231 #  define CC_VFNMS_F32(cc,r0,r1,r2)     cc_voddd(cc,ARM_VFNMS,r0,r1,r2)
232 #  define VFNMS_F32(r0,r1,r2)           CC_VFNMS_F32(ARM_CC_AL,r0,r1,r2)
233 #  define CC_VFNMS_F64(cc,r0,r1,r2)     cc_voddd(cc,ARM_VFNMS|ARM_V_F64,r0,r1,r2)
234 #  define VFNMS_F64(r0,r1,r2)           CC_VFNMS_F64(ARM_CC_AL,r0,r1,r2)
235 #  define CC_VMOV_F32(cc,r0,r1)         cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
236 #  define VMOV_F32(r0,r1)               CC_VMOV_F32(ARM_CC_AL,r0,r1)
237 #  define CC_VMOV_F64(cc,r0,r1)         cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
238 #  define VMOV_F64(r0,r1)               CC_VMOV_F64(ARM_CC_AL,r0,r1)
239 #  define CC_VMOV_AA_D(cc,r0,r1,r2)     cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
240 #  define VMOV_AA_D(r0,r1,r2)           CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
241 #  define CC_VMOV_D_AA(cc,r0,r1,r2)     cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
242 #  define VMOV_D_AA(r0,r1,r2)           CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
243 #  define CC_VMOV_A_S(cc,r0,r1)         cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
244 #  define VMOV_A_S(r0,r1)               CC_VMOV_A_S(ARM_CC_AL,r0,r1)
245 #  define CC_VMOV_S_A(cc,r0,r1)         cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
246 #  define VMOV_S_A(r0,r1)               CC_VMOV_S_A(ARM_CC_AL,r0,r1)
247 #  define CC_VCMP_F32(cc,r0,r1)         cc_vo_ss(cc,ARM_VCMP,r0,r1)
248 #  define VCMP_F32(r0,r1)               CC_VCMP_F32(ARM_CC_AL,r0,r1)
249 #  define CC_VCMP_F64(cc,r0,r1)         cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
250 #  define VCMP_F64(r0,r1)               CC_VCMP_F64(ARM_CC_AL,r0,r1)
251 #  define CC_VCMPE_F32(cc,r0,r1)        cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
252 #  define VCMPE_F32(r0,r1)              CC_VCMPE_F32(ARM_CC_AL,r0,r1)
253 #  define CC_VCMPE_F64(cc,r0,r1)        cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
254 #  define VCMPE_F64(r0,r1)              CC_VCMPE_F64(ARM_CC_AL,r0,r1)
255 #  define CC_VCMPZ_F32(cc,r0)           cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
256 #  define VCMPZ_F32(r0)                 CC_VCMPZ_F32(ARM_CC_AL,r0)
257 #  define CC_VCMPZ_F64(cc,r0)           cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
258 #  define VCMPZ_F64(r0)                 CC_VCMPZ_F64(ARM_CC_AL,r0)
259 #  define CC_VCMPEZ_F32(cc,r0)          cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
260 #  define VCMPEZ_F32(r0)                CC_VCMPEZ_F32(ARM_CC_AL,r0)
261 #  define CC_VCMPEZ_F64(cc,r0)          cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
262 #  define VCMPEZ_F64(r0)                CC_VCMPEZ_F64(ARM_CC_AL,r0)
263 #  define CC_VMRS(cc,r0)                cc_vorr_(cc,ARM_VMRS,r0,0)
264 #  define VMRS(r0)                      CC_VMRS(ARM_CC_AL,r0)
265 #  define CC_VMSR(cc,r0)                cc_vorr_(cc,ARM_VMSR,r0,0)
266 #  define VMSR(r0)                      CC_VMSR(ARM_CC_AL,r0)
267 #  define CC_VCVT_S32_F32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
268 #  define VCVT_S32_F32(r0,r1)           CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
269 #  define CC_VCVT_U32_F32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
270 #  define VCVT_U32_F32(r0,r1)           CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
271 #  define CC_VCVT_S32_F64(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
272 #  define VCVT_S32_F64(r0,r1)           CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
273 #  define CC_VCVT_U32_F64(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
274 #  define VCVT_U32_F64(r0,r1)           CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
275 #  define CC_VCVT_F32_S32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
276 #  define VCVT_F32_S32(r0,r1)           CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
277 #  define CC_VCVT_F32_U32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
278 #  define VCVT_F32_U32(r0,r1)           CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
279 #  define CC_VCVT_F64_S32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
280 #  define VCVT_F64_S32(r0,r1)           CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
281 #  define CC_VCVT_F64_U32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
282 #  define VCVT_F64_U32(r0,r1)           CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
283 #  define CC_VCVT_F32_F64(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
284 #  define VCVT_F32_F64(r0,r1)           CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
285 #  define CC_VCVT_F64_F32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
286 #  define VCVT_F64_F32(r0,r1)           CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
287 #  define CC_VCVTR_S32_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
288 #  define VCVTR_S32_F32(r0,r1)          CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
289 #  define CC_VCVTR_U32_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
290 #  define VCVTR_U32_F32(r0,r1)          CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
291 #  define CC_VCVTR_S32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
292 #  define VCVTR_S32_F64(r0,r1)          CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
293 #  define CC_VCVTR_U32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
294 #  define VCVTR_U32_F64(r0,r1)          CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
295 #  define CC_VLDMIA_F32(cc,r0,r1,i0)    cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
296 #  define VLDMIA_F32(r0,r1,i0)          CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
297 #  define CC_VLDMIA_F64(cc,r0,r1,i0)    cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
298 #  define VLDMIA_F64(r0,r1,i0)          CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
299 #  define CC_VSTMIA_F32(cc,r0,r1,i0)    cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
300 #  define VSTMIA_F32(r0,r1,i0)          CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
301 #  define CC_VSTMIA_F64(cc,r0,r1,i0)    cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
302 #  define VSTMIA_F64(r0,r1,i0)          CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
303 #  define CC_VLDMIA_U_F32(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
304 #  define VLDMIA_U_F32(r0,r1,i0)        CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
305 #  define CC_VLDMIA_U_F64(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
306 #  define VLDMIA_U_F64(r0,r1,i0)        CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
307 #  define CC_VSTMIA_U_F32(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
308 #  define VSTMIA_U_F32(r0,r1,i0)        CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
309 #  define CC_VSTMIA_U_F64(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
310 #  define VSTMIA_U_F64(r0,r1,i0)        CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
311 #  define CC_VLDMDB_U_F32(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
312 #  define VLDMDB_U_F32(r0,r1,i0)        CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
313 #  define CC_VLDMDB_U_F64(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
314 #  define VLDMDB_U_F64(r0,r1,i0)        CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
315 #  define CC_VSTMDB_U_F32(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
316 #  define VSTMDB_U_F32(r0,r1,i0)        CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
317 #  define CC_VSTMDB_U_F64(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
318 #  define VSTMDB_U_F64(r0,r1,i0)        CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
319 #  define CC_VPUSH_F32(cc,r0,i0)        CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
320 #  define VPUSH_F32(r0,i0)              CC_VPUSH_F32(ARM_CC_AL,r0,i0)
321 #  define CC_VPUSH_F64(cc,r0,i0)        CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
322 #  define VPUSH_F64(r0,i0)              CC_VPUSH_F64(ARM_CC_AL,r0,i0)
323 #  define CC_VPOP_F32(cc,r0,i0)         CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
324 #  define VPOP_F32(r0,i0)               CC_VPOP_F32(ARM_CC_AL,r0,i0)
325 #  define CC_VPOP_F64(cc,r0,i0)         CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
326 #  define VPOP_F64(r0,i0)               CC_VPOP_F64(ARM_CC_AL,r0,i0)
327 #  define CC_VMOV_A_S8(cc,r0,r1)        cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
328 #  define VMOV_A_S8(r0,r1)              CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
329 #  define CC_VMOV_A_U8(cc,r0,r1)        cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
330 #  define VMOV_A_U8(r0,r1)              CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
331 #  define CC_VMOV_A_S16(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
332 #  define VMOV_A_S16(r0,r1)             CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
333 #  define CC_VMOV_A_U16(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
334 #  define VMOV_A_U16(r0,r1)             CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
335 #  define CC_VMOV_A_S32(cc,r0,r1)       cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
336 #  define VMOV_A_S32(r0,r1)             CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
337 #  define CC_VMOV_A_U32(cc,r0,r1)       cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
338 #  define VMOV_A_U32(r0,r1)             CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
339 #  define CC_VMOV_V_I8(cc,r0,r1)        cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
340 #  define VMOV_V_I8(r0,r1)              CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
341 #  define CC_VMOV_V_I16(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
342 #  define VMOV_V_I16(r0,r1)             CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
343 #  define CC_VMOV_V_I32(cc,r0,r1)       cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
344 #  define VMOV_V_I32(r0,r1)             CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
345 #  define VCNT(r0,r1)                   vo_vv(ARM_VCNT,r0,r1)
346 #  define VADD_I8(r0,r1,r2)             voddd(ARM_VADD_I,r0,r1,r2)
347 #  define VADDQ_I8(r0,r1,r2)            voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
348 #  define VADD_I16(r0,r1,r2)            voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
349 #  define VADDQ_I16(r0,r1,r2)           voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
350 #  define VADD_I32(r0,r1,r2)            voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
351 #  define VADDQ_I32(r0,r1,r2)           voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
352 #  define VADD_I64(r0,r1,r2)            voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
353 #  define VADDQ_I64(r0,r1,r2)           voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
354 #  define VQADD_S8(r0,r1,r2)            voddd(ARM_VQADD_I,r0,r1,r2)
355 #  define VQADDQ_S8(r0,r1,r2)           voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
356 #  define VQADD_U8(r0,r1,r2)            voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
357 #  define VQADDQ_U8(r0,r1,r2)           voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
358 #  define VQADD_S16(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
359 #  define VQADDQ_S16(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
360 #  define VQADD_U16(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
361 #  define VQADDQ_U16(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
362 #  define VQADD_S32(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
363 #  define VQADDQ_S32(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
364 #  define VQADD_U32(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
365 #  define VQADDQ_U32(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
366 #  define VQADD_S64(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
367 #  define VQADDQ_S64(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
368 #  define VQADD_U64(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
369 #  define VQADDQ_U64(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
370 #  define VADDL_S8(r0,r1,r2)            voqdd(ARM_VADDL_I,r0,r1,r2)
371 #  define VADDL_U8(r0,r1,r2)            voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
372 #  define VADDL_S16(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
373 #  define VADDL_U16(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
374 #  define VADDL_S32(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
375 #  define VADDL_U32(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
376 #  define VADDW_S8(r0,r1,r2)            voqqd(ARM_VADDW_I,r0,r1,r2)
377 #  define VADDW_U8(r0,r1,r2)            voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
378 #  define VADDW_S16(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
379 #  define VADDW_U16(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
380 #  define VADDW_S32(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
381 #  define VADDW_U32(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
382 #  define VSUB_I8(r0,r1,r2)             voddd(ARM_VSUB_I,r0,r1,r2)
383 #  define VSUBQ_I8(r0,r1,r2)            voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
384 #  define VSUB_I16(r0,r1,r2)            voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
385 #  define VSUBQ_I16(r0,r1,r2)           voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
386 #  define VSUB_I32(r0,r1,r2)            voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
387 #  define VSUBQ_I32(r0,r1,r2)           voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
388 #  define VSUB_I64(r0,r1,r2)            voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
389 #  define VSUBQ_I64(r0,r1,r2)           voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
390 #  define VQSUB_S8(r0,r1,r2)            voddd(ARM_VQSUB_I,r0,r1,r2)
391 #  define VQSUBQ_S8(r0,r1,r2)           voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
392 #  define VQSUB_U8(r0,r1,r2)            voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
393 #  define VQSUBQ_U8(r0,r1,r2)           voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
394 #  define VQSUB_S16(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
395 #  define VQSUBQ_S16(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
396 #  define VQSUB_U16(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
397 #  define VQSUBQ_U16(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
398 #  define VQSUB_S32(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
399 #  define VQSUBQ_S32(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
400 #  define VQSUB_U32(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
401 #  define VQSUBQ_U32(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
402 #  define VQSUB_S64(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
403 #  define VQSUBQ_S64(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
404 #  define VQSUB_U64(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
405 #  define VQSUBQ_U64(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
406 #  define VSUBL_S8(r0,r1,r2)            voqdd(ARM_VSUBL_I,r0,r1,r2)
407 #  define VSUBL_U8(r0,r1,r2)            voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
408 #  define VSUBL_S16(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
409 #  define VSUBL_U16(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
410 #  define VSUBL_S32(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
411 #  define VSUBL_U32(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
412 #  define VSUBW_S8(r0,r1,r2)            voqqd(ARM_VSUBW_I,r0,r1,r2)
413 #  define VSUBW_U8(r0,r1,r2)            voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
414 #  define VSUBW_S16(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
415 #  define VSUBW_U16(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
416 #  define VSUBW_S32(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
417 #  define VSUBW_U32(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
418 #  define VMUL_I8(r0,r1,r2)             voddd(ARM_VMUL_I,r0,r1,r2)
419 #  define VMULQ_I8(r0,r1,r2)            voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
420 #  define VMUL_I16(r0,r1,r2)            voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
421 #  define VMULQ_I16(r0,r1,r2)           voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
422 #  define VMUL_I32(r0,r1,r2)            voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
423 #  define VMULQ_I32(r0,r1,r2)           voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
424 #  define VMULL_S8(r0,r1,r2)            voddd(ARM_VMULL_I,r0,r1,r2)
425 #  define VMULL_U8(r0,r1,r2)            voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
426 #  define VMULL_S16(r0,r1,r2)           voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
427 #  define VMULL_U16(r0,r1,r2)           voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
428 #  define VMULL_S32(r0,r1,r2)           voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
429 #  define VMULL_U32(r0,r1,r2)           voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
430 #  define VABS_S8(r0,r1)                vo_dd(ARM_VABS_I,r0,r1)
431 #  define VABSQ_S8(r0,r1)               vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
432 #  define VABS_S16(r0,r1)               vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
433 #  define VABSQ_S16(r0,r1)              vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
434 #  define VABS_S32(r0,r1)               vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
435 #  define VABSQ_S32(r0,r1)              vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
436 #  define VQABS_S8(r0,r1)               vo_dd(ARM_VQABS_I,r0,r1)
437 #  define VQABSQ_S8(r0,r1)              vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
438 #  define VQABS_S16(r0,r1)              vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
439 #  define VQABSQ_S16(r0,r1)             vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
440 #  define VQABS_S32(r0,r1)              vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
441 #  define VQABSQ_S32(r0,r1)             vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
442 #  define VNEG_S8(r0,r1)                vo_dd(ARM_VNEG_I,r0,r1)
443 #  define VNEGQ_S8(r0,r1)               vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
444 #  define VNEG_S16(r0,r1)               vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
445 #  define VNEGQ_S16(r0,r1)              vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
446 #  define VNEG_S32(r0,r1)               vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
447 #  define VNEGQ_S32(r0,r1)              vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
448 #  define VQNEG_S8(r0,r1)               vo_dd(ARM_VQNEG_I,r0,r1)
449 #  define VQNEGQ_S8(r0,r1)              vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
450 #  define VQNEG_S16(r0,r1)              vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
451 #  define VQNEGQ_S16(r0,r1)             vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
452 #  define VQNEG_S32(r0,r1)              vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
453 #  define VQNEGQ_S32(r0,r1)             vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
454 #  define VAND(r0,r1,r2)                voddd(ARM_VAND,r0,r1,r2)
455 #  define VANDQ(r0,r1,r2)               voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
456 #  define VBIC(r0,r1,r2)                voddd(ARM_VBIC,r0,r1,r2)
457 #  define VBICQ(r0,r1,r2)               voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
458 #  define VORR(r0,r1,r2)                voddd(ARM_VORR,r0,r1,r2)
459 #  define VORRQ(r0,r1,r2)               voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
460 #  define VORN(r0,r1,r2)                voddd(ARM_VORN,r0,r1,r2)
461 #  define VORNQ(r0,r1,r2)               voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
462 #  define VEOR(r0,r1,r2)                voddd(ARM_VEOR,r0,r1,r2)
463 #  define VEORQ(r0,r1,r2)               voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
464 #  define VMOV(r0,r1)                   VORR(r0,r1,r1)
465 #  define VMOVQ(r0,r1)                  VORRQ(r0,r1,r1)
466 #  define VMOVL_S8(r0,r1)               vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
467 #  define VMOVL_U8(r0,r1)               vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
468 #  define VMOVL_S16(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
469 #  define VMOVL_U16(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
470 #  define VMOVL_S32(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
471 #  define VMOVL_U32(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
472 /* "oi" should be the result of encode_vfp_double */
473 #  define VIMM(oi,r0)                   vodi(oi,r0)
474 #  define VIMMQ(oi,r0)                  voqi(oi|ARM_V_Q,r0)
475 /* index is multipled by four */
476 #  define CC_VLDRN_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR,r0,r1,i0)
477 #  define VLDRN_F32(r0,r1,i0)           CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
478 #  define CC_VLDR_F32(cc,r0,r1,i0)      cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
479 #  define VLDR_F32(r0,r1,i0)            CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
480 #  define CC_VLDRN_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
481 #  define VLDRN_F64(r0,r1,i0)           CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
482 #  define CC_VLDR_F64(cc,r0,r1,i0)      cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
483 #  define VLDR_F64(r0,r1,i0)            CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
484 #  define CC_VSTRN_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR,r0,r1,i0)
485 #  define VSTRN_F32(r0,r1,i0)           CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
486 #  define CC_VSTR_F32(cc,r0,r1,i0)      cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
487 #  define VSTR_F32(r0,r1,i0)            CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
488 #  define CC_VSTRN_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
489 #  define VSTRN_F64(r0,r1,i0)           CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
490 #  define CC_VSTR_F64(cc,r0,r1,i0)      cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
491 #  define VSTR_F64(r0,r1,i0)            CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
492 #  define vfp_popcntr(r0,r1)            _vfp_popcntr(_jit,r0,r1)
493 static void _vfp_popcntr(jit_state_t*,jit_int32_t,jit_int32_t);
494 #  define vfp_movr_f(r0,r1)             _vfp_movr_f(_jit,r0,r1)
495 static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
496 #  define vfp_movi_f(r0,i0)             _vfp_movi_f(_jit,r0,i0)
497 static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
498 #  define vfp_movr_w_f(r0, r1)          VMOV_S_A(r0, r1)
499 #  define vfp_movr_f_w(r0, r1)          VMOV_A_S(r0, r1)
500 #  define vfp_movi_w_f(r0, i0)          _vfp_movi_w_f(_jit, r0, i0)
501 static void _vfp_movi_w_f(jit_state_t*, jit_int32_t, jit_word_t);
502 #  define vfp_movr_d(r0,r1)             _vfp_movr_d(_jit,r0,r1)
503 static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
504 #  define vfp_movi_d(r0,i0)             _vfp_movi_d(_jit,r0,i0)
505 static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
506 #  define vfp_movr_ww_d(r0, r1, r2)     VMOV_D_AA(r0, r1, r2)
507 #  define vfp_movr_d_ww(r0, r1, r2)     VMOV_AA_D(r0, r1, r2)
508 static void _vfp_movr_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
509 #  define vfp_movi_ww_d(r0, i0, i1)    _vfp_movi_ww_d(_jit, r0, i0, i1)
510 static void _vfp_movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
511 #  define vfp_extr_f(r0,r1)             _vfp_extr_f(_jit,r0,r1)
512 static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
513 #  define vfp_extr_d(r0,r1)             _vfp_extr_d(_jit,r0,r1)
514 static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
515 #  define vfp_extr_d_f(r0,r1)           _vfp_extr_d_f(_jit,r0,r1)
516 static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
517 #  define vfp_extr_f_d(r0,r1)           _vfp_extr_f_d(_jit,r0,r1)
518 static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
519 #  define vfp_truncr_f_i(r0,r1)         _vfp_truncr_f_i(_jit,r0,r1)
520 static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
521 #  define vfp_truncr_d_i(r0,r1)         _vfp_truncr_d_i(_jit,r0,r1)
522 static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
523 #  define vfp_absr_f(r0,r1)             VABS_F32(r0,r1)
524 #  define vfp_absr_d(r0,r1)             VABS_F64(r0,r1)
525 #  define vfp_negr_f(r0,r1)             VNEG_F32(r0,r1)
526 #  define vfp_negr_d(r0,r1)             VNEG_F64(r0,r1)
527 #  define vfp_sqrtr_f(r0,r1)            VSQRT_F32(r0,r1)
528 #  define vfp_fmar_f(r0,r1,r2,r3)       _vfp_fmar_f(_jit,r0,r1,r2,r3)
529 static void _vfp_fmar_f(jit_state_t*,
530                         jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
531 #  define vfp_fmsr_f(r0,r1,r2,r3)       _vfp_fmsr_f(_jit,r0,r1,r2,r3)
532 static void _vfp_fmsr_f(jit_state_t*,
533                         jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
534 #  define vfp_fnmar_f(r0,r1,r2,r3)      _vfp_fnmar_f(_jit,r0,r1,r2,r3)
535 static void _vfp_fnmar_f(jit_state_t*,
536                          jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
537 #  define vfp_fnmsr_f(r0,r1,r2,r3)      _vfp_fnmsr_f(_jit,r0,r1,r2,r3)
538 static void _vfp_fnmsr_f(jit_state_t*,
539                          jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
540 #  define vfp_sqrtr_d(r0,r1)            VSQRT_F64(r0,r1)
541 #  define vfp_fmar_d(r0,r1,r2,r3)       _vfp_fmar_d(_jit,r0,r1,r2,r3)
542 static void _vfp_fmar_d(jit_state_t*,
543                         jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
544 #  define vfp_fmsr_d(r0,r1,r2,r3)       _vfp_fmsr_d(_jit,r0,r1,r2,r3)
545 static void _vfp_fmsr_d(jit_state_t*,
546                         jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
547 #  define vfp_fnmar_d(r0,r1,r2,r3)      _vfp_fnmar_d(_jit,r0,r1,r2,r3)
548 static void _vfp_fnmar_d(jit_state_t*,
549                          jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
550 #  define vfp_fnmsr_d(r0,r1,r2,r3)      _vfp_fnmsr_d(_jit,r0,r1,r2,r3)
551 static void _vfp_fnmsr_d(jit_state_t*,
552                          jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
553 #  define vfp_addr_f(r0,r1,r2)          VADD_F32(r0,r1,r2)
554 #  define vfp_addi_f(r0,r1,i0)          _vfp_addi_f(_jit,r0,r1,i0)
555 static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
556 #  define vfp_addr_d(r0,r1,r2)          VADD_F64(r0,r1,r2)
557 #  define vfp_addi_d(r0,r1,i0)          _vfp_addi_d(_jit,r0,r1,i0)
558 static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
559 #  define vfp_subr_f(r0,r1,r2)          VSUB_F32(r0,r1,r2)
560 #  define vfp_subi_f(r0,r1,i0)          _vfp_subi_f(_jit,r0,r1,i0)
561 static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
562 #  define vfp_subr_d(r0,r1,r2)          VSUB_F64(r0,r1,r2)
563 #  define vfp_subi_d(r0,r1,i0)          _vfp_subi_d(_jit,r0,r1,i0)
564 static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
565 #  define vfp_rsbr_f(r0,r1,r2)          vfp_subr_f(r0,r2,r1)
566 #  define vfp_rsbi_f(r0,r1,i0)          _vfp_rsbi_f(_jit,r0,r1,i0)
567 static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
568 #  define vfp_rsbr_d(r0,r1,r2)          vfp_subr_d(r0,r2,r1)
569 #  define vfp_rsbi_d(r0,r1,i0)          _vfp_rsbi_d(_jit,r0,r1,i0)
570 static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
571 #  define vfp_mulr_f(r0,r1,r2)          VMUL_F32(r0,r1,r2)
572 #  define vfp_muli_f(r0,r1,i0)          _vfp_muli_f(_jit,r0,r1,i0)
573 static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
574 #  define vfp_mulr_d(r0,r1,r2)          VMUL_F64(r0,r1,r2)
575 #  define vfp_muli_d(r0,r1,i0)          _vfp_muli_d(_jit,r0,r1,i0)
576 static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
577 #  define vfp_divr_f(r0,r1,r2)          VDIV_F32(r0,r1,r2)
578 #  define vfp_divi_f(r0,r1,i0)          _vfp_divi_f(_jit,r0,r1,i0)
579 static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
580 #  define vfp_divr_d(r0,r1,r2)          VDIV_F64(r0,r1,r2)
581 #  define vfp_divi_d(r0,r1,i0)          _vfp_divi_d(_jit,r0,r1,i0)
582 static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
583 #  define vfp_cmp_f(r0,r1)              _vfp_cmp_f(_jit,r0,r1)
584 static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
585 #  define vfp_cmp_d(r0,r1)              _vfp_cmp_d(_jit,r0,r1)
586 static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
587 #  define vcmp01_x(c0,c1,r0)            _vcmp01_x(_jit,c0,c1,r0)
588 static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
589 #  define vcmp01_f(c0,c1,r0,r1,r2)      _vcmp01_f(_jit,c0,c1,r0,r1,r2)
590 static void _vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
591 #  define vcmp01_d(c0,c1,r0,r1,r2)      _vcmp01_d(_jit,c0,c1,r0,r1,r2)
592 static void _vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
593 #  define vfp_ltr_f(r0,r1,r2)           vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
594 #  define vfp_lti_f(r0,r1,i0)           _vfp_lti_f(_jit,r0,r1,i0)
595 static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
596 #  define vfp_ltr_d(r0,r1,r2)           vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
597 #  define vfp_lti_d(r0,r1,i0)           _vfp_lti_d(_jit,r0,r1,i0)
598 static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
599 #  define vfp_ler_f(r0,r1,r2)           vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
600 #  define vfp_lei_f(r0,r1,i0)           _vfp_lei_f(_jit,r0,r1,i0)
601 static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
602 #  define vfp_ler_d(r0,r1,r2)           vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
603 #  define vfp_lei_d(r0,r1,i0)           _vfp_lei_d(_jit,r0,r1,i0)
604 static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
605 #  define vfp_eqr_f(r0,r1,r2)           vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
606 #  define vfp_eqi_f(r0,r1,i0)           _vfp_eqi_f(_jit,r0,r1,i0)
607 static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
608 #  define vfp_eqr_d(r0,r1,r2)           vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
609 #  define vfp_eqi_d(r0,r1,i0)           _vfp_eqi_d(_jit,r0,r1,i0)
610 static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
611 #  define vfp_ger_f(r0,r1,r2)           vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
612 #  define vfp_gei_f(r0,r1,i0)           _vfp_gei_f(_jit,r0,r1,i0)
613 static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
614 #  define vfp_ger_d(r0,r1,r2)           vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
615 #  define vfp_gei_d(r0,r1,i0)           _vfp_gei_d(_jit,r0,r1,i0)
616 static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
617 #  define vfp_gtr_f(r0,r1,r2)           vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
618 #  define vfp_gti_f(r0,r1,i0)           _vfp_gti_f(_jit,r0,r1,i0)
619 static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
620 #  define vfp_gtr_d(r0,r1,r2)           vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
621 #  define vfp_gti_d(r0,r1,i0)           _vfp_gti_d(_jit,r0,r1,i0)
622 static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
623 #  define vfp_ner_f(r0,r1,r2)           vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
624 #  define vfp_nei_f(r0,r1,i0)           _vfp_nei_f(_jit,r0,r1,i0)
625 static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
626 #  define vfp_ner_d(r0,r1,r2)           vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
627 #  define vfp_nei_d(r0,r1,i0)           _vfp_nei_d(_jit,r0,r1,i0)
628 static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
629 #  define vcmp10_x(c0,r0)               _vcmp10_x(_jit,c0,r0)
630 static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
631 #  define vcmp_10_f(c0,r0,r1,r2)        _vcmp_10_f(_jit,c0,r0,r1,r2)
632 static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
633 #  define vcmp_10_d(c0,r0,r1,r2)        _vcmp_10_d(_jit,c0,r0,r1,r2)
634 static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
635 #  define vfp_unltr_f(r0,r1,r2)         vcmp_10_f(ARM_CC_GE,r0,r1,r2)
636 #  define vfp_unlti_f(r0,r1,i0)         _vfp_unlti_f(_jit,r0,r1,i0)
637 static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
638 #  define vfp_unltr_d(r0,r1,r2)         vcmp_10_d(ARM_CC_GE,r0,r1,r2)
639 #  define vfp_unlti_d(r0,r1,i0)         _vfp_unlti_d(_jit,r0,r1,i0)
640 static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
641 #  define vfp_unler_f(r0,r1,r2)         vcmp_10_f(ARM_CC_GT,r0,r1,r2)
642 #  define vfp_unlei_f(r0,r1,i0)         _vfp_unlei_f(_jit,r0,r1,i0)
643 static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
644 #  define vfp_unler_d(r0,r1,r2)         vcmp_10_d(ARM_CC_GT,r0,r1,r2)
645 #  define vfp_unlei_d(r0,r1,i0)         _vfp_unlei_d(_jit,r0,r1,i0)
646 static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
647 #  define vfp_uneqr_x(r0)               _vfp_uneqr_x(_jit,r0)
648 static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
649 #  define vfp_uneqr_f(r0,r1,r2)         _vfp_uneqr_f(_jit,r0,r1,r2)
650 static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
651 #  define vfp_uneqi_f(r0,r1,i0)         _vfp_uneqi_f(_jit,r0,r1,i0)
652 static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
653 #  define vfp_uneqr_d(r0,r1,r2)         _vfp_uneqr_d(_jit,r0,r1,r2)
654 static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
655 #  define vfp_uneqi_d(r0,r1,i0)         _vfp_uneqi_d(_jit,r0,r1,i0)
656 static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
657 #  define vcmp_01_x(c0,r0)              _vcmp_01_x(_jit,c0,r0)
658 static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
659 #  define vcmp_01_f(c0,r0,r1,r2)        _vcmp_01_f(_jit,c0,r0,r1,r2)
660 static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
661 #  define vcmp_01_d(c0,r0,r1,r2)        _vcmp_01_d(_jit,c0,r0,r1,r2)
662 static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
663 #  define vfp_unger_f(r0,r1,r2)         vcmp_01_f(ARM_CC_CS,r0,r1,r2)
664 #  define vfp_ungei_f(r0,r1,i0)         _vfp_ungei_f(_jit,r0,r1,i0)
665 static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
666 #  define vfp_unger_d(r0,r1,r2)         vcmp_01_d(ARM_CC_CS,r0,r1,r2)
667 #  define vfp_ungei_d(r0,r1,i0)         _vfp_ungei_d(_jit,r0,r1,i0)
668 static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
669 #  define vfp_ungtr_f(r0,r1,r2)         vcmp_01_f(ARM_CC_HI,r0,r1,r2)
670 #  define vfp_ungti_f(r0,r1,i0)         _vfp_ungti_f(_jit,r0,r1,i0)
671 static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
672 #  define vfp_ungtr_d(r0,r1,r2)         vcmp_01_d(ARM_CC_HI,r0,r1,r2)
673 #  define vfp_ungti_d(r0,r1,i0)         _vfp_ungti_d(_jit,r0,r1,i0)
674 static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
675 #  define vfp_ltgtr_x(r0)               _vfp_ltgtr_x(_jit,r0)
676 static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
677 #  define vfp_ltgtr_f(r0,r1,r2)         _vfp_ltgtr_f(_jit,r0,r1,r2)
678 static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
679 #  define vfp_ltgti_f(r0,r1,i0)         _vfp_ltgti_f(_jit,r0,r1,i0)
680 static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
681 #  define vfp_ltgtr_d(r0,r1,r2)         _vfp_ltgtr_d(_jit,r0,r1,r2)
682 static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
683 #  define vfp_ltgti_d(r0,r1,i0)         _vfp_ltgti_d(_jit,r0,r1,i0)
684 static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
685 #  define vfp_ordr_f(r0,r1,r2)          _vfp_ordr_f(_jit,r0,r1,r2)
686 static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
687 #  define vfp_ordi_f(r0,r1,i0)          _vfp_ordi_f(_jit,r0,r1,i0)
688 static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
689 #  define vfp_ordr_d(r0,r1,r2)          _vfp_ordr_d(_jit,r0,r1,r2)
690 static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
691 #  define vfp_ordi_d(r0,r1,i0)          _vfp_ordi_d(_jit,r0,r1,i0)
692 static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
693 #  define vfp_unordr_f(r0,r1,r2)        _vfp_unordr_f(_jit,r0,r1,r2)
694 static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
695 #  define vfp_unordi_f(r0,r1,i0)        _vfp_unordi_f(_jit,r0,r1,i0)
696 static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
697 #  define vfp_unordr_d(r0,r1,r2)        _vfp_unordr_d(_jit,r0,r1,r2)
698 static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
699 #  define vfp_unordi_d(r0,r1,i0)        _vfp_unordi_d(_jit,r0,r1,i0)
700 static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
701 #  define vbcmp_x(cc,i0)                _vbcmp_x(_jit,cc,i0)
702 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
703 #  define vbcmp_f(cc,i0,r0,r1)          _vbcmp_f(_jit,cc,i0,r0,r1)
704 static jit_word_t
705 _vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
706 #  define vbcmp_x(cc,i0)                _vbcmp_x(_jit,cc,i0)
707 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
708 #  define vbcmp_d(cc,i0,r0,r1)          _vbcmp_d(_jit,cc,i0,r0,r1)
709 static jit_word_t
710 _vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
711 #  define vfp_bltr_f(i0,r0,r1)          vbcmp_f(ARM_CC_MI,i0,r0,r1)
712 #  define vfp_blti_f(i0,r0,i1)          _vfp_blti_f(_jit,i0,r0,i1)
713 static jit_word_t _vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
714 #  define vfp_bltr_d(i0,r0,r1)          vbcmp_d(ARM_CC_MI,i0,r0,r1)
715 static jit_word_t _vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
716 #  define vfp_blti_d(i0,r0,i1)          _vfp_blti_d(_jit,i0,r0,i1)
717 #  define vfp_bler_f(i0,r0,r1)          vbcmp_f(ARM_CC_LS,i0,r0,r1)
718 #  define vfp_blei_f(i0,r0,i1)          _vfp_blei_f(_jit,i0,r0,i1)
719 static jit_word_t _vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
720 #  define vfp_bler_d(i0,r0,r1)          vbcmp_d(ARM_CC_LS,i0,r0,r1)
721 #  define vfp_blei_d(i0,r0,i1)          _vfp_blei_d(_jit,i0,r0,i1)
722 static jit_word_t _vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
723 #  define vfp_beqr_f(i0,r0,r1)          vbcmp_f(ARM_CC_EQ,i0,r0,r1)
724 #  define vfp_beqi_f(i0,r0,i1)          _vfp_beqi_f(_jit,i0,r0,i1)
725 static jit_word_t _vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
726 #  define vfp_beqr_d(i0,r0,r1)          vbcmp_d(ARM_CC_EQ,i0,r0,r1)
727 #  define vfp_beqi_d(i0,r0,i1)          _vfp_beqi_d(_jit,i0,r0,i1)
728 static jit_word_t _vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
729 #  define vfp_bger_f(i0,r0,r1)          vbcmp_f(ARM_CC_GE,i0,r0,r1)
730 #  define vfp_bgei_f(i0,r0,i1)          _vfp_bgei_f(_jit,i0,r0,i1)
731 static jit_word_t _vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
732 #  define vfp_bger_d(i0,r0,r1)          vbcmp_d(ARM_CC_GE,i0,r0,r1)
733 #  define vfp_bgei_d(i0,r0,i1)          _vfp_bgei_d(_jit,i0,r0,i1)
734 static jit_word_t _vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
735 #  define vfp_bgtr_f(i0,r0,r1)          vbcmp_f(ARM_CC_GT,i0,r0,r1)
736 #  define vfp_bgti_f(i0,r0,i1)          _vfp_bgti_f(_jit,i0,r0,i1)
737 static jit_word_t _vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
738 #  define vfp_bgtr_d(i0,r0,r1)          vbcmp_d(ARM_CC_GT,i0,r0,r1)
739 #  define vfp_bgti_d(i0,r0,i1)          _vfp_bgti_d(_jit,i0,r0,i1)
740 static jit_word_t _vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
741 #  define vfp_bner_f(i0,r0,r1)          vbcmp_f(ARM_CC_NE,i0,r0,r1)
742 #  define vfp_bnei_f(i0,r0,i1)          _vfp_bnei_f(_jit,i0,r0,i1)
743 static jit_word_t _vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
744 #  define vfp_bner_d(i0,r0,r1)          vbcmp_d(ARM_CC_NE,i0,r0,r1)
745 #  define vfp_bnei_d(i0,r0,i1)          _vfp_bnei_d(_jit,i0,r0,i1)
746 static jit_word_t _vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
747 #  define vbncmp_x(cc,i0)               _vbncmp_x(_jit,cc,i0)
748 static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
749 #  define vbncmp_f(cc,i0,r0,r1)         _vbncmp_f(_jit,cc,i0,r0,r1)
750 static jit_word_t
751 _vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
752 #  define vbncmp_d(cc,i0,r0,r1)         _vbncmp_d(_jit,cc,i0,r0,r1)
753 static jit_word_t
754 _vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
755 #  define vfp_bunltr_f(i0,r0,r1)        vbncmp_f(ARM_CC_GE,i0,r0,r1)
756 #  define vfp_bunlti_f(i0,r0,i1)        _vfp_bunlti_f(_jit,i0,r0,i1)
757 static jit_word_t
758 _vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
759 #  define vfp_bunltr_d(i0,r0,r1)        vbncmp_d(ARM_CC_GE,i0,r0,r1)
760 #  define vfp_bunlti_d(i0,r0,i1)        _vfp_bunlti_d(_jit,i0,r0,i1)
761 static jit_word_t
762 _vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
763 #  define vfp_bunler_f(i0,r0,r1)        vbncmp_f(ARM_CC_GT,i0,r0,r1)
764 #  define vfp_bunlei_f(i0,r0,i1)        _vfp_bunlei_f(_jit,i0,r0,i1)
765 static jit_word_t
766 _vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
767 #  define vfp_bunler_d(i0,r0,r1)        vbncmp_d(ARM_CC_GT,i0,r0,r1)
768 #  define vfp_bunlei_d(i0,r0,i1)        _vfp_bunlei_d(_jit,i0,r0,i1)
769 static jit_word_t
770 _vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
771 #  define vfp_buneqr_x(i0)              _vfp_buneqr_x(_jit,i0)
772 static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
773 #  define vfp_buneqr_f(i0,r0,r1)        _vfp_buneqr_f(_jit,i0,r0,r1)
774 static jit_word_t
775 _vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
776 #  define vfp_buneqi_f(i0,r0,i1)        _vfp_buneqi_f(_jit,i0,r0,i1)
777 static jit_word_t
778 _vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
779 #  define vfp_buneqr_d(i0,r0,r1)        _vfp_buneqr_d(_jit,i0,r0,r1)
780 static jit_word_t
781 _vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
782 #  define vfp_buneqi_d(i0,r0,i1)        _vfp_buneqi_d(_jit,i0,r0,i1)
783 static jit_word_t
784 _vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
785 #  define vfp_bunger_x(i0)              _vfp_bunger_x(_jit,i0)
786 static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
787 #  define vfp_bunger_f(i0,r0,r1)        _vfp_bunger_f(_jit,i0,r0,r1)
788 static jit_word_t
789 _vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
790 #  define vfp_bungei_f(i0,r0,i1)        _vfp_bungei_f(_jit,i0,r0,i1)
791 static jit_word_t
792 _vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
793 #  define vfp_bunger_d(i0,r0,r1)        _vfp_bunger_d(_jit,i0,r0,r1)
794 static jit_word_t
795 _vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
796 #  define vfp_bungei_d(i0,r0,i1)        _vfp_bungei_d(_jit,i0,r0,i1)
797 static jit_word_t
798 _vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
799 #  define vfp_bungtr_f(i0,r0,r1)        vbcmp_f(ARM_CC_HI,i0,r0,r1)
800 #  define vfp_bungti_f(i0,r0,i1)        _vfp_bungti_f(_jit,i0,r0,i1)
801 static jit_word_t
802 _vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
803 #  define vfp_bungtr_d(i0,r0,r1)        vbcmp_d(ARM_CC_HI,i0,r0,r1)
804 #  define vfp_bungti_d(i0,r0,i1)        _vfp_bungti_d(_jit,i0,r0,i1)
805 static jit_word_t
806 _vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
807 #  define vfp_bltgtr_x(i0)              _vfp_bltgtr_x(_jit,i0)
808 static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
809 #  define vfp_bltgtr_f(i0,r0,r1)        _vfp_bltgtr_f(_jit,i0,r0,r1)
810 static jit_word_t
811 _vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
812 #  define vfp_bltgti_f(i0,r0,i1)        _vfp_bltgti_f(_jit,i0,r0,i1)
813 static jit_word_t
814 _vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
815 #  define vfp_bltgtr_d(i0,r0,r1)        _vfp_bltgtr_d(_jit,i0,r0,r1)
816 static jit_word_t
817 _vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
818 #  define vfp_bltgti_d(i0,r0,i1)        _vfp_bltgti_d(_jit,i0,r0,i1)
819 static jit_word_t
820 _vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
821 #  define vfp_bordr_f(i0,r0,r1)         vbcmp_f(ARM_CC_VC,i0,r0,r1)
822 #  define vfp_bordi_f(i0,r0,i1)         _vfp_bordi_f(_jit,i0,r0,i1)
823 static jit_word_t
824 _vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
825 #  define vfp_bordr_d(i0,r0,r1)         vbcmp_d(ARM_CC_VC,i0,r0,r1)
826 #  define vfp_bordi_d(i0,r0,i1)         _vfp_bordi_d(_jit,i0,r0,i1)
827 static jit_word_t
828 _vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
829 #  define vfp_bunordr_f(i0,r0,r1)       vbcmp_f(ARM_CC_VS,i0,r0,r1)
830 #  define vfp_bunordi_f(i0,r0,i1)       _vfp_bunordi_f(_jit,i0,r0,i1)
831 static jit_word_t
832 _vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
833 #  define vfp_bunordr_d(i0,r0,r1)       vbcmp_d(ARM_CC_VS,i0,r0,r1)
834 #  define vfp_bunordi_d(i0,r0,i1)       _vfp_bunordi_d(_jit,i0,r0,i1)
835 static jit_word_t
836 _vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
837 #  define vfp_ldr_f(r0,r1)              VLDR_F32(r0,r1,0)
838 #  define vfp_ldr_d(r0,r1)              VLDR_F64(r0,r1,0)
839 #  define vfp_ldi_f(r0,i0)              _vfp_ldi_f(_jit,r0,i0)
840 static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
841 #  define vfp_ldi_d(r0,i0)              _vfp_ldi_d(_jit,r0,i0)
842 static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
843 #  define vfp_ldxr_f(r0,r1,r2)          _vfp_ldxr_f(_jit,r0,r1,r2)
844 static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
845 #  define vfp_ldxr_d(r0,r1,r2)          _vfp_ldxr_d(_jit,r0,r1,r2)
846 static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
847 #  define vfp_ldxi_f(r0,r1,i0)          _vfp_ldxi_f(_jit,r0,r1,i0)
848 static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
849 #  define vfp_ldxi_d(r0,r1,i0)          _vfp_ldxi_d(_jit,r0,r1,i0)
850 static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
851 #  define vfp_unldr_x(r0, r1, i0)       _vfp_unldr_x(_jit, r0, r1, i0)
852 static void _vfp_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
853 #  define vfp_unldi_x(r0, i0, i1)       _vfp_unldi_x(_jit, r0, i0, i1)
854 static void _vfp_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
855 #  define vfp_str_f(r0,r1)              VSTR_F32(r1,r0,0)
856 #  define vfp_str_d(r0,r1)              VSTR_F64(r1,r0,0)
857 #  define vfp_sti_f(i0,r0)              _vfp_sti_f(_jit,i0,r0)
858 static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
859 #  define vfp_sti_d(i0,r0)              _vfp_sti_d(_jit,i0,r0)
860 static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
861 #  define vfp_stxr_f(r0,r1,r2)          _vfp_stxr_f(_jit,r0,r1,r2)
862 static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
863 #  define vfp_stxr_d(r0,r1,r2)          _vfp_stxr_d(_jit,r0,r1,r2)
864 static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
865 #  define vfp_stxi_f(i0,r0,r1)          _vfp_stxi_f(_jit,i0,r0,r1)
866 static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
867 #  define vfp_stxi_d(i0,r0,r1)          _vfp_stxi_d(_jit,i0,r0,r1)
868 static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
869 #define vfp_unstr_x(r0, r1, i0)         _vfp_unstr_x(_jit, r0, r1, i0)
870 static void _vfp_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
871 #define vfp_unsti_x(i0, r0, i1)         _vfp_unsti_x(_jit, i0, r0, i1)
872 static void _vfp_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
873 #  define vfp_vaarg_d(r0, r1)           _vfp_vaarg_d(_jit, r0, r1)
874 static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
875 #endif
876
877 #if CODE
878 #  define vfp_regno(rn)         (((rn) - 16) >> 1)
879
880 static int
881 encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
882 {
883     int         code, mode, imm, mask;
884
885     if (hi != lo) {
886         if (mov && !inv) {
887             /* (I64)
888              *  aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
889              */
890             for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
891                 imm = lo & mask;
892                 if (imm != mask && imm != 0)
893                     goto fail;
894                 imm = hi & mask;
895                 if (imm != mask && imm != 0)
896                     goto fail;
897             }
898             mode = 0xe20;
899             imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
900                    ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >>  3) |
901                    ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
902                    ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >>  7));
903             goto success;
904         }
905         goto fail;
906     }
907     /*  (I32)
908      *  00000000 00000000 00000000 abcdefgh
909      *  00000000 00000000 abcdefgh 00000000
910      *  00000000 abcdefgh 00000000 00000000
911      *  abcdefgh 00000000 00000000 00000000 */
912     for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
913         if ((lo & mask) == lo) {
914             imm = lo >> (mode << 3);
915             mode <<= 9;
916             goto success;
917         }
918     }
919     /*  (I16)
920      *  00000000 abcdefgh 00000000 abcdefgh
921      *  abcdefgh 00000000 abcdefgh 00000000 */
922     for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
923         if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
924             imm = lo >> (mode << 3);
925             mode = 0x800 | (mode << 9);
926             goto success;
927         }
928     }
929     if (mov) {
930         /*  (I32)
931          *  00000000 00000000 abcdefgh 11111111
932          *  00000000 abcdefgh 11111111 11111111 */
933         for (mode = 0, mask = 0xff; mode < 2;
934              mask = (mask << 8) | 0xff, mode++) {
935             if ((lo & mask) == mask &&
936                 !((lo & ~mask) >> 8) &&
937                 (imm = lo >> (8 + (mode << 8)))) {
938                 mode = 0xc00 | (mode << 8);
939                 goto success;
940             }
941         }
942         if (!inv) {
943             /* (F32)
944              *  aBbbbbbc defgh000 00000000 00000000
945              *  from the ARM Architecture Reference Manual:
946              *  In this entry, B = NOT(b). The bit pattern represents the
947              *  floating-point number (-1)^s* 2^exp * mantissa, where
948              *  S = UInt(a),
949              *  exp = UInt(NOT(b):c:d)-3 and
950              *  mantissa = (16+UInt(e:f:g:h))/16. */
951             if ((lo & 0x7ffff) == 0 &&
952                 (((lo & 0x7e000000) == 0x3e000000) ||
953                  ((lo & 0x7e000000) == 0x40000000))) {
954                 mode = 0xf00;
955                 imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
956                 goto success;
957             }
958         }
959     }
960
961 fail:
962     /* need another approach (load from memory, move from arm register, etc) */
963     return (-1);
964
965 success:
966     code = inv ? ARM_VMVNI : ARM_VMOVI;
967     switch ((mode & 0xf00) >> 8) {
968         case 0x0:       case 0x2:       case 0x4:       case 0x6:
969         case 0x8:       case 0xa:
970             if (inv)    mode |= 0x20;
971             if (!mov)   mode |= 0x100;
972             break;
973         case 0x1:       case 0x3:       case 0x5:       case 0x7:
974             /* should actually not reach here */
975             assert(!inv);
976         case 0x9:       case 0xb:
977             assert(!mov);
978             break;
979         case 0xc:       case 0xd:
980             /* should actually not reach here */
981             assert(inv);
982         case 0xe:
983             assert(mode & 0x20);
984             assert(mov && !inv);
985             break;
986         default:
987             assert(!(mode & 0x20));
988             break;
989     }
990     imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
991     code |= mode | imm;
992     if (jit_thumb_p()) {
993         if (code & 0x1000000)
994             code |= 0xff000000;
995         else
996             code |= 0xef000000;
997     }
998     else
999         code |= ARM_CC_NV;
1000     return (code);
1001 }
1002
1003 static void
1004 _vodi(jit_state_t *_jit, int oi, int r0)
1005 {
1006     jit_thumb_t thumb;
1007     assert(!(oi  & 0x0000f000));
1008     assert(!(r0 & 1));  r0 = vfp_regno(r0);
1009     thumb.i = oi|(_u4(r0)<<12);
1010     if (jit_thumb_p())
1011         iss(thumb.s[0], thumb.s[1]);
1012     else
1013         ii(thumb.i);
1014 }
1015
1016 static void
1017 _voqi(jit_state_t *_jit, int oi, int r0)
1018 {
1019     jit_thumb_t thumb;
1020     assert(!(oi  & 0x0000f000));
1021     assert(!(r0 & 3));  r0 = vfp_regno(r0);
1022     thumb.i = oi|(_u4(r0)<<12);
1023     if (jit_thumb_p())
1024         iss(thumb.s[0], thumb.s[1]);
1025     else
1026         ii(thumb.i);
1027 }
1028
1029 static void
1030 _cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
1031 {
1032     jit_thumb_t thumb;
1033     assert(!(cc & 0x0fffffff));
1034     assert(!(o  & 0xf000f00f));
1035     if (r0 & 1) o |= ARM_V_D;   r0 = vfp_regno(r0);
1036     if (r1 & 1) o |= ARM_V_M;   r1 = vfp_regno(r1);
1037     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1038     if (jit_thumb_p())
1039         iss(thumb.s[0], thumb.s[1]);
1040     else
1041         ii(thumb.i);
1042 }
1043
1044 static void
1045 _cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
1046 {
1047     jit_thumb_t thumb;
1048     assert(!(cc & 0x0fffffff));
1049     assert(!(o  & 0xf000f00f));
1050     assert(!(r0 & 1) && !(r1 & 1));
1051     r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1052     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1053     if (jit_thumb_p())
1054         iss(thumb.s[0], thumb.s[1]);
1055     else
1056         ii(thumb.i);
1057 }
1058
1059 static void
1060 _cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
1061 {
1062     jit_thumb_t thumb;
1063     assert(!(cc & 0x0fffffff));
1064     assert(!(o  & 0xf000f00f));
1065     assert(!(r0 & 3) && !(r1 & 1));
1066     r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1067     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1068     if (jit_thumb_p())
1069         iss(thumb.s[0], thumb.s[1]);
1070     else
1071         ii(thumb.i);
1072 }
1073
1074 static void
1075 _cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
1076 {
1077     jit_thumb_t thumb;
1078     assert(!(cc & 0x0fffffff));
1079     assert(!(o  & 0xf000f00f));
1080     assert(!(r0 & 3) && !(r1 & 3));
1081     r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1082     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1083     if (jit_thumb_p())
1084         iss(thumb.s[0], thumb.s[1]);
1085     else
1086         ii(thumb.i);
1087 }
1088
1089 static void
1090 _cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1091 {
1092     jit_thumb_t thumb;
1093     assert(!(cc & 0x0fffffff));
1094     assert(!(o  & 0xf000f00f));
1095     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1096     if (jit_thumb_p())
1097         iss(thumb.s[0], thumb.s[1]);
1098     else
1099         ii(thumb.i);
1100 }
1101
1102 static void
1103 _cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1104 {
1105     jit_thumb_t thumb;
1106     assert(!(cc & 0x0fffffff));
1107     assert(!(o  & 0xf000f00f));
1108     if (r1 & 1) o |= ARM_V_N;   r1 = vfp_regno(r1);
1109     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1110     if (jit_thumb_p())
1111         iss(thumb.s[0], thumb.s[1]);
1112     else
1113         ii(thumb.i);
1114 }
1115
1116 static void
1117 _cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1118 {
1119     jit_thumb_t thumb;
1120     assert(!(cc & 0x0fffffff));
1121     assert(!(o  & 0xf000f00f));
1122     if (r1 & 1) o |= ARM_V_M;   r1 = vfp_regno(r1);
1123     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1124     if (jit_thumb_p())
1125         iss(thumb.s[0], thumb.s[1]);
1126     else
1127         ii(thumb.i);
1128 }
1129
1130 static void
1131 _cc_vo_vv(jit_state_t *_jit, int cc, int o, int r0, int r1)
1132 {
1133     jit_thumb_t thumb;
1134     assert(!(cc & 0x0fffffff));
1135     assert(!(o  & 0xf000f00f));
1136     r0 = vfp_regno(r0);
1137     r1 = vfp_regno(r1);
1138     thumb.i = cc|o|(_u4(r1)<<12)|_u4(r0);
1139     if (jit_thumb_p())
1140         iss(thumb.s[0], thumb.s[1]);
1141     else
1142         ii(thumb.i);
1143 }
1144
1145 static void
1146 _cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1147 {
1148     jit_thumb_t thumb;
1149     assert(!(cc & 0x0fffffff));
1150     assert(!(o  & 0xf000f00f));
1151     /* use same bit pattern, to set opc1... */
1152     if (r1 & 1) o |= ARM_V_I32; r1 = vfp_regno(r1);
1153     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1154     if (jit_thumb_p())
1155         iss(thumb.s[0], thumb.s[1]);
1156     else
1157         ii(thumb.i);
1158 }
1159
1160 static void
1161 _cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1162 {
1163     jit_thumb_t thumb;
1164     assert(!(cc & 0x0fffffff));
1165     assert(!(o  & 0xf00ff00f));
1166     assert(!(r2 & 1));
1167     r2 = vfp_regno(r2);
1168     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1169     if (jit_thumb_p())
1170         iss(thumb.s[0], thumb.s[1]);
1171     else
1172         ii(thumb.i);
1173 }
1174
1175 static void
1176 _cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1177 {
1178     jit_thumb_t thumb;
1179     assert(!(cc & 0x0fffffff));
1180     assert(!(o  & 0xf00ff00f));
1181     if (r0 & 1) o |= ARM_V_D;   r0 = vfp_regno(r0);
1182     if (r1 & 1) o |= ARM_V_N;   r1 = vfp_regno(r1);
1183     if (r2 & 1) o |= ARM_V_M;   r2 = vfp_regno(r2);
1184     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1185     if (jit_thumb_p())
1186         iss(thumb.s[0], thumb.s[1]);
1187     else
1188         ii(thumb.i);
1189 }
1190
1191 static void
1192 _cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1193 {
1194     jit_thumb_t thumb;
1195     assert(!(cc & 0x0fffffff));
1196     assert(!(o  & 0xf00ff00f));
1197     assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
1198     r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
1199     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1200     if (jit_thumb_p())
1201         iss(thumb.s[0], thumb.s[1]);
1202     else
1203         ii(thumb.i);
1204 }
1205
1206 static void
1207 _cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1208 {
1209     jit_thumb_t thumb;
1210     assert(!(cc & 0x0fffffff));
1211     assert(!(o  & 0xf00ff00f));
1212     assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
1213     r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
1214     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1215     if (jit_thumb_p())
1216         iss(thumb.s[0], thumb.s[1]);
1217     else
1218         ii(thumb.i);
1219 }
1220
1221 static void
1222 _cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1223 {
1224     jit_thumb_t thumb;
1225     assert(!(cc & 0x0fffffff));
1226     assert(!(o  & 0xf00ff00f));
1227     assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
1228     r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
1229     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1230     if (jit_thumb_p())
1231         iss(thumb.s[0], thumb.s[1]);
1232     else
1233         ii(thumb.i);
1234 }
1235
1236 static void
1237 _cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1238 {
1239     jit_thumb_t thumb;
1240     assert(!(cc & 0x0fffffff));
1241     assert(!(o  & 0xf00ff00f));
1242     assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
1243     r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
1244     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1245     if (jit_thumb_p())
1246         iss(thumb.s[0], thumb.s[1]);
1247     else
1248         ii(thumb.i);
1249 }
1250
1251 static void
1252 _cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1253 {
1254     jit_thumb_t thumb;
1255     /* i0 << 2 is byte offset */
1256     assert(!(cc & 0x0fffffff));
1257     assert(!(o  & 0xf00ff0ff));
1258     if (r0 & 1) {
1259         assert(!(o & ARM_V_F64));
1260         o |= ARM_V_D;
1261     }
1262     r0 = vfp_regno(r0);
1263     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
1264     if (jit_thumb_p())
1265         iss(thumb.s[0], thumb.s[1]);
1266     else
1267         ii(thumb.i);
1268 }
1269
1270 static void
1271 _cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1272 {
1273     jit_thumb_t thumb;
1274     assert(!(cc & 0x0fffffff));
1275     assert(!(o  & 0xf00ff0ff));
1276     /* save i0 double precision registers */
1277     if (o & ARM_V_F64)          i0 <<= 1;
1278     /* if (r1 & 1) cc & ARM_V_F64 must be false */
1279     if (r1 & 1) o |= ARM_V_D;   r1 = vfp_regno(r1);
1280     assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
1281     thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
1282     if (jit_thumb_p())
1283         iss(thumb.s[0], thumb.s[1]);
1284     else
1285         ii(thumb.i);
1286 }
1287
1288 static void
1289 _vfp_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1290 {
1291     jit_int32_t         reg;
1292     reg = jit_get_reg(jit_class_fpr);
1293     VMOV_S_A(rn(reg), r1);
1294     VCNT(rn(reg), rn(reg));
1295     VADD_I8(rn(reg), rn(reg), rn(reg));
1296     VMOV_A_S(r0, rn(reg));
1297     jit_unget_reg(reg);
1298 }
1299
1300 static void
1301 _vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1302 {
1303     assert(jit_fpr_p(r0) && jit_fpr_p(r1));
1304     if (r0 != r1)
1305         VMOV_F32(r0, r1);
1306 }
1307
1308 static void
1309 _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
1310 {
1311     union {
1312         jit_int32_t     i;
1313         jit_float32_t   f;
1314     } u;
1315     jit_int32_t         reg;
1316     jit_int32_t         code;
1317     u.f = i0;
1318     assert(jit_fpr_p(r0));
1319     /* float arguments are packed, for others,
1320      * lightning only address even registers */
1321     if (!(r0 & 1) && (r0 - 32) >= 0 &&
1322         ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
1323          (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
1324         VIMM(code, r0);
1325     else {
1326         reg = jit_get_reg(jit_class_gpr);
1327         movi(rn(reg), u.i);
1328         VMOV_S_A(r0, rn(reg));
1329         jit_unget_reg(reg);
1330     }
1331 }
1332
1333 static void
1334 _vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1335 {
1336     assert(jit_fpr_p(r0) && jit_fpr_p(r1));
1337     if (r0 != r1)
1338         VMOV_F64(r0, r1);
1339 }
1340
1341 static void
1342 _vfp_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1343 {
1344     jit_int32_t                reg;
1345     reg = jit_get_reg(jit_class_gpr);
1346     movi(rn(reg), i0);
1347     vfp_movr_w_f(r0, rn(reg));
1348     jit_unget_reg(reg);
1349 }
1350
1351 static void
1352 _vfp_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
1353 {
1354     jit_int32_t                t0, t1;
1355     t0 = jit_get_reg(jit_class_gpr);
1356     t1 = jit_get_reg(jit_class_gpr);
1357     movi(rn(t0), i0);
1358     movi(rn(t1), i1);
1359     vfp_movr_ww_d(r0, rn(t0), rn(t1));
1360     jit_unget_reg(t1);
1361     jit_unget_reg(t0);
1362 }
1363
1364 static void
1365 _vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
1366 {
1367     union {
1368         jit_int32_t     i[2];
1369         jit_float64_t   d;
1370     } u;
1371     jit_int32_t         code;
1372     jit_int32_t         rg0, rg1;
1373     u.d = i0;
1374 #  if __BYTE_ORDER == __BIG_ENDIAN
1375     code = u.i[0];
1376     u.i[0] = u.i[1];
1377     u.i[1] = code;
1378 #  endif
1379     assert(jit_fpr_p(r0));
1380     if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
1381         (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
1382         VIMM(code, r0);
1383     else {
1384         rg0 = jit_get_reg(jit_class_gpr);
1385         rg1 = jit_get_reg(jit_class_gpr);
1386         movi(rn(rg0), u.i[0]);
1387         movi(rn(rg1), u.i[1]);
1388         VMOV_D_AA(r0, rn(rg0), rn(rg1));
1389         jit_unget_reg(rg1);
1390         jit_unget_reg(rg0);
1391     }
1392 }
1393
1394 static void
1395 _vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1396 {
1397     jit_int32_t         reg;
1398     if (jit_fpr_p(r1)) {
1399         if (jit_fpr_p(r0))
1400             VCVT_F64_F32(r0, r1);
1401         else {
1402             reg = jit_get_reg(jit_class_fpr);
1403             VCVT_F64_F32(rn(reg), r1);
1404             VMOV_A_S(r0, rn(reg));
1405             jit_unget_reg(reg);
1406         }
1407     }
1408     else {
1409         reg = jit_get_reg(jit_class_fpr);
1410         VMOV_S_A(rn(reg), r1);
1411         VCVT_F64_F32(rn(reg), rn(reg));
1412         if (jit_fpr_p(r0))
1413             VMOV_F32(r0, rn(reg));
1414         else
1415             VMOV_A_S(r0, rn(reg));
1416         jit_unget_reg(reg);
1417     }
1418 }
1419
1420 static void
1421 _vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1422 {
1423     jit_int32_t         reg;
1424     if (jit_fpr_p(r1)) {
1425         if (jit_fpr_p(r0))
1426             VCVT_F32_F64(r0, r1);
1427         else {
1428             reg = jit_get_reg(jit_class_fpr);
1429             VCVT_F32_F64(rn(reg), r1);
1430             VMOV_AA_D(r0, r0 + 1, rn(reg));
1431             jit_unget_reg(reg);
1432         }
1433     }
1434     else {
1435         reg = jit_get_reg(jit_class_fpr);
1436         VMOV_D_AA(rn(reg), r1, r1 + 1);
1437         VCVT_F32_F64(rn(reg), rn(reg));
1438         if (jit_fpr_p(r0))
1439             VMOV_F64(r0, rn(reg));
1440         else
1441             VMOV_AA_D(r0, r0 + 1, rn(reg));
1442         jit_unget_reg(reg);
1443     }
1444 }
1445
1446 static void
1447 _vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1448 {
1449     jit_int32_t         reg;
1450     if (jit_fpr_p(r0)) {
1451         VMOV_V_I32(r0, r1);
1452         VCVT_F32_S32(r0, r0);
1453     }
1454     else {
1455         reg = jit_get_reg(jit_class_fpr);
1456         VMOV_V_I32(rn(reg), r1);
1457         VCVT_F32_S32(rn(reg), rn(reg));
1458         VMOV_F32(r0, rn(reg));
1459         jit_unget_reg(reg);
1460     }
1461 }
1462
1463 static void
1464 _vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1465 {
1466     jit_int32_t         reg;
1467     if (jit_fpr_p(r0)) {
1468         VMOV_V_I32(r0, r1);
1469         VCVT_F64_S32(r0, r0);
1470     }
1471     else {
1472         reg = jit_get_reg(jit_class_fpr);
1473         VMOV_V_I32(rn(reg), r1);
1474         VCVT_F64_S32(rn(reg), rn(reg));
1475         VMOV_F64(r0, rn(reg));
1476         jit_unget_reg(reg);
1477     }
1478 }
1479
1480 static void
1481 _vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1482 {
1483     jit_int32_t         reg;
1484     reg = jit_get_reg(jit_class_fpr);
1485     if (jit_fpr_p(r1))
1486         VCVT_S32_F32(rn(reg), r1);
1487     else {
1488         VMOV_V_I32(rn(reg), r1);
1489         VCVT_S32_F32(rn(reg), rn(reg));
1490     }
1491     VMOV_A_S32(r0, rn(reg));
1492     jit_unget_reg(reg);
1493 }
1494
1495 static void
1496 _vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1497 {
1498     jit_int32_t         reg;
1499     reg = jit_get_reg(jit_class_fpr);
1500     if (jit_fpr_p(r1))
1501         VCVT_S32_F64(rn(reg), r1);
1502     else {
1503         VMOV_V_I32(rn(reg), r1);
1504         VCVT_S32_F64(rn(reg), rn(reg));
1505     }
1506     VMOV_A_S32(r0, rn(reg));
1507     jit_unget_reg(reg);
1508 }
1509
1510 static void
1511 _vfp_fmar_f(jit_state_t *_jit,
1512             jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1513 {
1514     jit_int32_t         t0;
1515     /* untested */
1516     if (0 && jit_cpu.vfp >= 4) {
1517         if (r0 != r2 && r0 != r3) {
1518             vfp_movr_f(r0, r1);
1519             VFMA_F32(r0, r2, r3);
1520         }
1521         else {
1522             t0 = jit_get_reg(jit_class_fpr);
1523             vfp_movr_f(rn(t0), r1);
1524             VFMA_F32(rn(t0), r2, r3);
1525             vfp_movr_f(r0, rn(t0));
1526             jit_unget_reg(t0);
1527         }
1528     }
1529     else {
1530         if (r0 != r3) {
1531             vfp_mulr_f(r0, r1, r2);
1532             vfp_addr_f(r0, r0, r3);
1533         }
1534         else {
1535             t0 = jit_get_reg(jit_class_fpr);
1536             vfp_mulr_f(rn(t0), r1, r2);
1537             vfp_addr_f(r0, rn(t0), r3);
1538             jit_unget_reg(t0);
1539         }
1540     }
1541 }
1542
1543 static void
1544 _vfp_fmsr_f(jit_state_t *_jit,
1545             jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1546 {
1547     jit_int32_t         t0;
1548     /* untested */
1549     if (0 && jit_cpu.vfp >= 4) {
1550         if (r0 != r2 && r0 != r3) {
1551             vfp_movr_f(r0, r1);
1552             VFMS_F32(r0, r2, r3);
1553         }
1554         else {
1555             t0 = jit_get_reg(jit_class_fpr);
1556             vfp_movr_f(rn(t0), r1);
1557             VFMS_F32(rn(t0), r2, r3);
1558             vfp_movr_f(r0, rn(t0));
1559             jit_unget_reg(t0);
1560         }
1561         vfp_negr_f(r0, r0);
1562     }
1563     else {
1564         if (r0 != r3) {
1565             vfp_mulr_f(r0, r1, r2);
1566             vfp_subr_f(r0, r0, r3);
1567         }
1568         else {
1569             t0 = jit_get_reg(jit_class_fpr);
1570             vfp_mulr_f(rn(t0), r1, r2);
1571             vfp_subr_f(r0, rn(t0), r3);
1572             jit_unget_reg(t0);
1573         }
1574     }
1575 }
1576
1577 static void
1578 _vfp_fnmar_f(jit_state_t *_jit,
1579              jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1580 {
1581     jit_int32_t         t0;
1582     /* untested */
1583     if (0 && jit_cpu.vfp >= 4) {
1584         if (r0 != r2 && r0 != r3) {
1585             vfp_movr_f(r0, r1);
1586             VFNMA_F32(r0, r2, r3);
1587         }
1588         else {
1589             t0 = jit_get_reg(jit_class_fpr);
1590             vfp_movr_f(rn(t0), r1);
1591             VFNMA_F32(rn(t0), r2, r3);
1592             vfp_movr_f(r0, rn(t0));
1593             jit_unget_reg(t0);
1594         }
1595     }
1596     else {
1597         t0 = jit_get_reg(jit_class_fpr);
1598         vfp_negr_f(rn(t0), r1);
1599         vfp_mulr_f(rn(t0), rn(t0), r2);
1600         vfp_subr_f(r0, rn(t0), r3);
1601         jit_unget_reg(t0);
1602     }
1603 }
1604
1605 static void
1606 _vfp_fnmsr_f(jit_state_t *_jit,
1607              jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1608 {
1609     jit_int32_t         t0;
1610     /* untested */
1611     if (0 && jit_cpu.vfp >= 4) {
1612         if (r0 != r2 && r0 != r3) {
1613             vfp_movr_f(r0, r1);
1614             VFNMS_F32(r0, r2, r3);
1615         }
1616         else {
1617             t0 = jit_get_reg(jit_class_fpr);
1618             vfp_movr_f(rn(t0), r1);
1619             VFNMS_F32(rn(t0), r2, r3);
1620             vfp_movr_f(r0, rn(t0));
1621             jit_unget_reg(t0);
1622         }
1623         vfp_negr_f(r0, r0);
1624     }
1625     else {
1626         t0 = jit_get_reg(jit_class_fpr);
1627         vfp_negr_f(rn(t0), r1);
1628         vfp_mulr_f(rn(t0), rn(t0), r2);
1629         vfp_addr_f(r0, rn(t0), r3);
1630         jit_unget_reg(t0);
1631     }
1632 }
1633
1634 static void
1635 _vfp_fmar_d(jit_state_t *_jit,
1636             jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1637 {
1638     jit_int32_t         t0;
1639     /* untested */
1640     if (0 && jit_cpu.vfp >= 4) {
1641         if (r0 != r2 && r0 != r3) {
1642             vfp_movr_d(r0, r1);
1643             VFMA_F64(r0, r2, r3);
1644         }
1645         else {
1646             t0 = jit_get_reg(jit_class_fpr);
1647             vfp_movr_d(rn(t0), r1);
1648             VFMA_F64(rn(t0), r2, r3);
1649             vfp_movr_d(r0, rn(t0));
1650             jit_unget_reg(t0);
1651         }
1652     }
1653     else {
1654         if (r0 != r3) {
1655             vfp_mulr_d(r0, r1, r2);
1656             vfp_addr_d(r0, r0, r3);
1657         }
1658         else {
1659             t0 = jit_get_reg(jit_class_fpr);
1660             vfp_mulr_d(rn(t0), r1, r2);
1661             vfp_addr_d(r0, rn(t0), r3);
1662             jit_unget_reg(t0);
1663         }
1664     }
1665 }
1666
1667 static void
1668 _vfp_fmsr_d(jit_state_t *_jit,
1669             jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1670 {
1671     jit_int32_t         t0;
1672     /* untested */
1673     if (0 && jit_cpu.vfp >= 4) {
1674         if (r0 != r2 && r0 != r3) {
1675             vfp_movr_d(r0, r1);
1676             VFMS_F64(r0, r2, r3);
1677         }
1678         else {
1679             t0 = jit_get_reg(jit_class_fpr);
1680             vfp_movr_d(rn(t0), r1);
1681             VFMS_F64(rn(t0), r2, r3);
1682             vfp_movr_d(r0, rn(t0));
1683             jit_unget_reg(t0);
1684         }
1685         vfp_negr_d(r0, r0);
1686     }
1687     else {
1688         if (r0 != r3) {
1689             vfp_mulr_d(r0, r1, r2);
1690             vfp_subr_d(r0, r0, r3);
1691         }
1692         else {
1693             t0 = jit_get_reg(jit_class_fpr);
1694             vfp_mulr_d(rn(t0), r1, r2);
1695             vfp_subr_d(r0, rn(t0), r3);
1696             jit_unget_reg(t0);
1697         }
1698     }
1699 }
1700
1701 static void
1702 _vfp_fnmar_d(jit_state_t *_jit,
1703              jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1704 {
1705     jit_int32_t         t0;
1706     /* untested */
1707     if (0 && jit_cpu.vfp >= 4) {
1708         if (r0 != r2 && r0 != r3) {
1709             vfp_movr_d(r0, r1);
1710             VFNMA_F64(r0, r2, r3);
1711         }
1712         else {
1713             t0 = jit_get_reg(jit_class_fpr);
1714             vfp_movr_d(rn(t0), r1);
1715             VFNMA_F64(rn(t0), r2, r3);
1716             vfp_movr_d(r0, rn(t0));
1717             jit_unget_reg(t0);
1718         }
1719     }
1720     else {
1721         t0 = jit_get_reg(jit_class_fpr);
1722         vfp_negr_d(rn(t0), r1);
1723         vfp_mulr_d(rn(t0), rn(t0), r2);
1724         vfp_subr_d(r0, rn(t0), r3);
1725         jit_unget_reg(t0);
1726     }
1727 }
1728
1729 static void
1730 _vfp_fnmsr_d(jit_state_t *_jit,
1731              jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1732 {
1733     jit_int32_t         t0;
1734     /* untested */
1735     if (0 && jit_cpu.vfp >= 4) {
1736         if (r0 != r2 && r0 != r3) {
1737             vfp_movr_d(r0, r1);
1738             VFNMS_F64(r0, r2, r3);
1739         }
1740         else {
1741             t0 = jit_get_reg(jit_class_fpr);
1742             vfp_movr_d(rn(t0), r1);
1743             VFNMS_F64(rn(t0), r2, r3);
1744             vfp_movr_d(r0, rn(t0));
1745             jit_unget_reg(t0);
1746         }
1747         vfp_negr_d(r0, r0);
1748     }
1749     else {
1750         t0 = jit_get_reg(jit_class_fpr);
1751         vfp_negr_d(rn(t0), r1);
1752         vfp_mulr_d(rn(t0), rn(t0), r2);
1753         vfp_addr_d(r0, rn(t0), r3);
1754         jit_unget_reg(t0);
1755     }
1756 }
1757
1758 #  define fopi(name)                                                    \
1759 static void                                                             \
1760 _vfp_##name##i_f(jit_state_t *_jit,                                     \
1761                  jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)      \
1762 {                                                                       \
1763     jit_int32_t         reg = jit_get_reg(jit_class_fpr);               \
1764     vfp_movi_f(rn(reg), i0);                                            \
1765     vfp_##name##r_f(r0, r1, rn(reg));                                   \
1766     jit_unget_reg(reg);                                                 \
1767 }
1768 #  define dopi(name)                                                    \
1769 static void                                                             \
1770 _vfp_##name##i_d(jit_state_t *_jit,                                     \
1771                  jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)      \
1772 {                                                                       \
1773     jit_int32_t         reg = jit_get_reg(jit_class_fpr);               \
1774     vfp_movi_d(rn(reg), i0);                                            \
1775     vfp_##name##r_d(r0, r1, rn(reg));                                   \
1776     jit_unget_reg(reg);                                                 \
1777 }
1778 #  define fbopi(name)                                                   \
1779 static jit_word_t                                                       \
1780 _vfp_b##name##i_f(jit_state_t *_jit,                                    \
1781                   jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)     \
1782 {                                                                       \
1783     jit_word_t          word;                                           \
1784     jit_int32_t         reg = jit_get_reg(jit_class_fpr|                \
1785                                           jit_class_nospill);           \
1786     vfp_movi_f(rn(reg), i0);                                            \
1787     word = vfp_b##name##r_f(r0, r1, rn(reg));                           \
1788     jit_unget_reg(reg);                                                 \
1789     return (word);                                                      \
1790 }
1791 #  define dbopi(name)                                                   \
1792 static jit_word_t                                                       \
1793 _vfp_b##name##i_d(jit_state_t *_jit,                                    \
1794                   jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)     \
1795 {                                                                       \
1796     jit_word_t          word;                                           \
1797     jit_int32_t         reg = jit_get_reg(jit_class_fpr|                \
1798                                           jit_class_nospill);           \
1799     vfp_movi_d(rn(reg), i0);                                            \
1800     word = vfp_b##name##r_d(r0, r1, rn(reg));                           \
1801     jit_unget_reg(reg);                                                 \
1802     return (word);                                                      \
1803 }
1804
1805 fopi(add)
1806 dopi(add)
1807 fopi(sub)
1808 fopi(rsb)
1809 dopi(rsb)
1810 dopi(sub)
1811 fopi(mul)
1812 dopi(mul)
1813 fopi(div)
1814 dopi(div)
1815
1816 static void
1817 _vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1818 {
1819     jit_int32_t         rg0, rg1;
1820     if (jit_fpr_p(r0)) {
1821         if (jit_fpr_p(r1))
1822             VCMP_F32(r0, r1);
1823         else {
1824             rg1 = jit_get_reg(jit_class_fpr);
1825             VMOV_S_A(rn(rg1), r1);
1826             VCMP_F32(r0, rn(rg1));
1827             jit_unget_reg(rg1);
1828         }
1829     }
1830     else {
1831         rg0 = jit_get_reg(jit_class_fpr);
1832         VMOV_S_A(rn(rg0), r0);
1833         if (jit_fpr_p(r1))
1834             VCMP_F32(rn(rg0), r1);
1835         else {
1836             rg1 = jit_get_reg(jit_class_fpr);
1837             VMOV_S_A(rn(rg1), r1);
1838             VCMP_F32(rn(rg0), rn(rg1));
1839             jit_unget_reg(rg1);
1840         }
1841         jit_unget_reg(rg0);
1842     }
1843 }
1844
1845 static void
1846 _vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1847 {
1848     jit_int32_t         rg0, rg1;
1849     if (jit_fpr_p(r0)) {
1850         if (jit_fpr_p(r1))
1851             VCMP_F64(r0, r1);
1852         else {
1853             rg1 = jit_get_reg(jit_class_fpr);
1854             VMOV_D_AA(rn(rg1), r1, r1 + 1);
1855             VCMP_F64(r0, rn(rg1));
1856             jit_unget_reg(rg1);
1857         }
1858     }
1859     else {
1860         rg0 = jit_get_reg(jit_class_fpr);
1861         VMOV_D_AA(rn(rg0), r0, r0 + 1);
1862         if (jit_fpr_p(r1))
1863             VCMP_F64(rn(rg0), r1);
1864         else {
1865             rg1 = jit_get_reg(jit_class_fpr);
1866             VMOV_D_AA(rn(rg1), r1, r1 + 1);
1867             VCMP_F64(rn(rg0), rn(rg1));
1868             jit_unget_reg(rg1);
1869         }
1870         jit_unget_reg(rg0);
1871     }
1872 }
1873
1874 static void
1875 _vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
1876 {
1877     VMRS(_R15_REGNO);
1878     if (jit_thumb_p()) {
1879         if ((c0 ^ c1) >> 28 == 1) {
1880             ITE(c0);
1881             if (r0 < 8) {
1882                 T1_MOVI(r0, 0);
1883                 T1_MOVI(r0, 1);
1884             }
1885             else {
1886                 T2_MOVI(r0, 0);
1887                 T2_MOVI(r0, 1);
1888             }
1889         }
1890         else {
1891             if (r0 < 8) {
1892                 IT(c0);
1893                 T1_MOVI(r0, 0);
1894                 IT(c1);
1895                 T1_MOVI(r0, 1);
1896             }
1897             else {
1898                 IT(c0);
1899                 T2_MOVI(r0, 0);
1900                 IT(c1);
1901                 T2_MOVI(r0, 1);
1902             }
1903         }
1904     }
1905     else {
1906         CC_MOVI(c0, r0, 0);
1907         CC_MOVI(c1, r0, 1);
1908     }
1909 }
1910
1911 static void
1912 _vcmp01_f(jit_state_t *_jit, int c0, int c1,
1913           jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1914 {
1915     vfp_cmp_f(r1, r2);
1916     vcmp01_x(c0, c1, r0);
1917 }
1918
1919 static void
1920 _vcmp01_d(jit_state_t *_jit, int c0, int c1,
1921           jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1922 {
1923     vfp_cmp_d(r1, r2);
1924     vcmp01_x(c0, c1, r0);
1925 }
1926
1927 static void
1928 _vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1929 {
1930     if (jit_thumb_p()) {
1931         if (r0 < 8) {
1932             T1_MOVI(r0, 1);
1933             VMRS(_R15_REGNO);
1934             IT(cc);
1935             T1_MOVI(r0, 0);
1936         }
1937         else {
1938             T2_MOVI(r0, 1);
1939             VMRS(_R15_REGNO);
1940             IT(cc);
1941             T2_MOVI(r0, 0);
1942         }
1943     }
1944     else {
1945         VMRS(_R15_REGNO);
1946         MOVI(r0, 1);
1947         CC_MOVI(cc, r0, 0);
1948     }
1949 }
1950 static void
1951 _vcmp_10_f(jit_state_t *_jit, int cc,
1952            jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1953 {
1954     vfp_cmp_f(r1, r2);
1955     vcmp10_x(cc, r0);
1956 }
1957
1958 static void
1959 _vcmp_10_d(jit_state_t *_jit, int cc,
1960            jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1961 {
1962     vfp_cmp_d(r1, r2);
1963     vcmp10_x(cc, r0);
1964 }
1965
1966 fopi(lt)
1967 dopi(lt)
1968 fopi(le)
1969 dopi(le)
1970 fopi(eq)
1971 dopi(eq)
1972 fopi(ge)
1973 dopi(ge)
1974 fopi(gt)
1975 dopi(gt)
1976 fopi(ne)
1977 dopi(ne)
1978 fopi(unlt)
1979 dopi(unlt)
1980 fopi(unle)
1981 dopi(unle)
1982
1983 static void
1984 _vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
1985 {
1986     VMRS(_R15_REGNO);
1987     if (jit_thumb_p()) {
1988         ITE(ARM_CC_NE);
1989         if (r0 < 8) {
1990             T1_MOVI(r0, 0);
1991             T1_MOVI(r0, 1);
1992             IT(ARM_CC_VS);
1993             T1_MOVI(r0, 1);
1994         }
1995         else {
1996             T2_MOVI(r0, 0);
1997             T2_MOVI(r0, 1);
1998             IT(ARM_CC_VS);
1999             T2_MOVI(r0, 1);
2000         }
2001     }
2002     else {
2003         CC_MOVI(ARM_CC_NE, r0, 0);
2004         CC_MOVI(ARM_CC_EQ, r0, 1);
2005         CC_MOVI(ARM_CC_VS, r0, 1);
2006     }
2007 }
2008
2009 static void
2010 _vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2011 {
2012     vfp_cmp_f(r1, r2);
2013     vfp_uneqr_x(r0);
2014 }
2015
2016 fopi(uneq)
2017
2018 static void
2019 _vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2020 {
2021     vfp_cmp_d(r1, r2);
2022     vfp_uneqr_x(r0);
2023 }
2024
2025 dopi(uneq)
2026
2027 static void
2028 _vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
2029 {
2030     if (jit_thumb_p()) {
2031         if (r0 < 8) {
2032             T1_MOVI(r0, 0);
2033             VMRS(_R15_REGNO);
2034             IT(cc);
2035             T1_MOVI(r0, 1);
2036         }
2037         else {
2038             T2_MOVI(r0, 0);
2039             VMRS(_R15_REGNO);
2040             IT(cc);
2041             T2_MOVI(r0, 1);
2042         }
2043     }
2044     else {
2045         MOVI(r0, 0);
2046         VMRS(_R15_REGNO);
2047         CC_MOVI(cc, r0, 1);
2048     }
2049 }
2050
2051 static void
2052 _vcmp_01_f(jit_state_t *_jit, int cc,
2053            jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2054 {
2055     vfp_cmp_f(r1, r2);
2056     vcmp_01_x(cc, r0);
2057 }
2058
2059 static void
2060 _vcmp_01_d(jit_state_t *_jit, int cc,
2061            jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2062 {
2063     vfp_cmp_d(r1, r2);
2064     vcmp_01_x(cc, r0);
2065 }
2066
2067 fopi(unge)
2068 dopi(unge)
2069 fopi(ungt)
2070 dopi(ungt)
2071
2072 static void
2073 _vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
2074 {
2075     VMRS(_R15_REGNO);
2076     if (jit_thumb_p()) {
2077         ITE(ARM_CC_NE);
2078         if (r0 < 8) {
2079             T1_MOVI(r0, 1);
2080             T1_MOVI(r0, 0);
2081             IT(ARM_CC_VS);
2082             T1_MOVI(r0, 0);
2083         }
2084         else {
2085             T2_MOVI(r0, 1);
2086             T2_MOVI(r0, 0);
2087             IT(ARM_CC_VS);
2088             T2_MOVI(r0, 0);
2089         }
2090     }
2091     else {
2092         CC_MOVI(ARM_CC_NE, r0, 1);
2093         CC_MOVI(ARM_CC_EQ, r0, 0);
2094         CC_MOVI(ARM_CC_VS, r0, 0);
2095     }
2096 }
2097
2098 static void
2099 _vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2100 {
2101     vfp_cmp_f(r1, r2);
2102     vfp_ltgtr_x(r0);
2103 }
2104
2105 fopi(ltgt)
2106
2107 static void
2108 _vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2109 {
2110     vfp_cmp_d(r1, r2);
2111     vfp_ltgtr_x(r0);
2112 }
2113
2114 dopi(ltgt)
2115
2116 static void
2117 _vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2118 {
2119     vfp_cmp_f(r1, r2);
2120     vcmp10_x(ARM_CC_VS, r0);
2121 }
2122
2123 fopi(ord)
2124
2125 static void
2126 _vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2127 {
2128     vfp_cmp_d(r1, r2);
2129     vcmp10_x(ARM_CC_VS, r0);
2130 }
2131
2132 dopi(ord)
2133
2134 static void
2135 _vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2136 {
2137     vfp_cmp_f(r1, r2);
2138     vcmp_01_x(ARM_CC_VS, r0);
2139 }
2140
2141 fopi(unord)
2142
2143 static void
2144 _vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2145 {
2146     vfp_cmp_d(r1, r2);
2147     vcmp_01_x(ARM_CC_VS, r0);
2148 }
2149
2150 dopi(unord)
2151
2152 static jit_word_t
2153 _vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
2154 {
2155     jit_word_t          d, w;
2156     VMRS(_R15_REGNO);
2157     w = _jit->pc.w;
2158     if (jit_thumb_p()) {
2159         d = ((i0 - w) >> 1) - 2;
2160         assert(_s20P(d));
2161         T2_CC_B(cc, encode_thumb_cc_jump(d));
2162     }
2163     else {
2164         d = ((i0 - w) >> 2) - 2;
2165         assert(_s24P(d));
2166         CC_B(cc, d & 0x00ffffff);
2167     }
2168     return (w);
2169 }
2170
2171
2172 static jit_word_t
2173 _vbcmp_f(jit_state_t *_jit, int cc,
2174          jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2175 {
2176     vfp_cmp_f(r0, r1);
2177     return (vbcmp_x(cc, i0));
2178 }
2179
2180 static jit_word_t
2181 _vbcmp_d(jit_state_t *_jit, int cc,
2182          jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2183 {
2184     vfp_cmp_d(r0, r1);
2185     return (vbcmp_x(cc, i0));
2186 }
2187
2188 static jit_word_t
2189 _vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
2190 {
2191     jit_word_t          d, p, w;
2192     VMRS(_R15_REGNO);
2193     p = _jit->pc.w;
2194     if (jit_thumb_p()) {
2195         T2_CC_B(cc, 0);
2196         w = _jit->pc.w;
2197         d = ((i0 - w) >> 1) - 2;
2198         assert(_s20P(d));
2199         T2_B(encode_thumb_jump(d));
2200     }
2201     else {
2202         CC_B(cc, 0);
2203         w = _jit->pc.w;
2204         d = ((i0 - w) >> 2) - 2;
2205         assert(_s24P(d));
2206         B(d & 0x00ffffff);
2207     }
2208     patch_at(arm_patch_jump, p, _jit->pc.w);
2209     return (w);
2210 }
2211
2212 static jit_word_t
2213 _vbncmp_f(jit_state_t *_jit, int cc,
2214           jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2215 {
2216     vfp_cmp_f(r0, r1);
2217     return (vbncmp_x(cc, i0));
2218 }
2219
2220 static jit_word_t
2221 _vbncmp_d(jit_state_t *_jit, int cc,
2222           jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2223 {
2224     vfp_cmp_d(r0, r1);
2225     return (vbncmp_x(cc, i0));
2226 }
2227
2228 fbopi(lt)
2229 dbopi(lt)
2230 fbopi(le)
2231 dbopi(le)
2232 fbopi(eq)
2233 dbopi(eq)
2234 fbopi(ge)
2235 dbopi(ge)
2236 fbopi(gt)
2237 dbopi(gt)
2238 fbopi(ne)
2239 dbopi(ne)
2240 fbopi(unlt)
2241 dbopi(unlt)
2242 fbopi(unle)
2243 dbopi(unle)
2244
2245 static jit_word_t
2246 _vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
2247 {
2248     jit_word_t          d, p, q, w;
2249     VMRS(_R15_REGNO);
2250     p = _jit->pc.w;
2251     if (jit_thumb_p()) {
2252         T2_CC_B(ARM_CC_VS, 0);
2253         q = _jit->pc.w;
2254         T2_CC_B(ARM_CC_NE, 0);
2255         patch_at(arm_patch_jump, p, _jit->pc.w);
2256         w = _jit->pc.w;
2257         d = ((i0 - w) >> 1) - 2;
2258         assert(_s20P(d));
2259         T2_B(encode_thumb_jump(d));
2260     }
2261     else {
2262         CC_B(ARM_CC_VS, 0);
2263         q = _jit->pc.w;
2264         CC_B(ARM_CC_NE, 0);
2265         patch_at(arm_patch_jump, p, _jit->pc.w);
2266         w = _jit->pc.w;
2267         d = ((i0 - w) >> 2) - 2;
2268         assert(_s24P(d));
2269         B(d & 0x00ffffff);
2270     }
2271     patch_at(arm_patch_jump, q, _jit->pc.w);
2272     return (w);
2273 }
2274
2275 static jit_word_t
2276 _vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2277 {
2278     vfp_cmp_f(r0, r1);
2279     return (vfp_buneqr_x(i0));
2280 }
2281
2282 fbopi(uneq)
2283
2284 static jit_word_t
2285 _vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2286 {
2287     vfp_cmp_d(r0, r1);
2288     return (vfp_buneqr_x(i0));
2289 }
2290
2291 dbopi(uneq)
2292
2293 static jit_word_t
2294 _vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
2295 {
2296     jit_word_t          d, p, w;
2297     VMRS(_R15_REGNO);
2298     p = _jit->pc.w;
2299     if (jit_thumb_p()) {
2300         T2_CC_B(ARM_CC_MI, 0);
2301         w = _jit->pc.w;
2302         d = ((i0 - w) >> 1) - 2;
2303         assert(_s20P(d));
2304         T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
2305     }
2306     else {
2307         CC_B(ARM_CC_MI, 0);
2308         w = _jit->pc.w;
2309         d = ((i0 - w) >> 2) - 2;
2310         assert(_s24P(d));
2311         CC_B(ARM_CC_HS, d & 0x00ffffff);
2312     }
2313     patch_at(arm_patch_jump, p, _jit->pc.w);
2314     return (w);
2315 }
2316
2317 static jit_word_t
2318 _vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2319 {
2320     vfp_cmp_f(r0, r1);
2321     return (vfp_bunger_x(i0));
2322 }
2323
2324 fbopi(unge)
2325
2326 static jit_word_t
2327 _vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2328 {
2329     vfp_cmp_d(r0, r1);
2330     return (vfp_bunger_x(i0));
2331 }
2332
2333 dbopi(unge)
2334
2335 static jit_word_t
2336 _vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
2337 {
2338     jit_word_t          d, p, q, w;
2339     VMRS(_R15_REGNO);
2340     p = _jit->pc.w;
2341     if (jit_thumb_p()) {
2342         T2_CC_B(ARM_CC_VS, 0);
2343         q = _jit->pc.w;
2344         T2_CC_B(ARM_CC_EQ, 0);
2345         w = _jit->pc.w;
2346         d = ((i0 - w) >> 1) - 2;
2347         assert(_s20P(d));
2348         T2_B(encode_thumb_jump(d));
2349     }
2350     else {
2351         CC_B(ARM_CC_VS, 0);
2352         q = _jit->pc.w;
2353         CC_B(ARM_CC_EQ, 0);
2354         w = _jit->pc.w;
2355         d = ((i0 - w) >> 2) - 2;
2356         assert(_s24P(d));
2357         B(d & 0x00ffffff);
2358     }
2359     patch_at(arm_patch_jump, p, _jit->pc.w);
2360     patch_at(arm_patch_jump, q, _jit->pc.w);
2361     return (w);
2362 }
2363
2364 static jit_word_t
2365 _vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2366 {
2367     vfp_cmp_f(r0, r1);
2368     return (vfp_bltgtr_x(i0));
2369 }
2370
2371 fbopi(ungt)
2372 dbopi(ungt)
2373 fbopi(ltgt)
2374
2375 static jit_word_t
2376 _vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2377 {
2378     vfp_cmp_d(r0, r1);
2379     return (vfp_bltgtr_x(i0));
2380 }
2381
2382 dbopi(ltgt)
2383 fbopi(ord)
2384 dbopi(ord)
2385 fbopi(unord)
2386 dbopi(unord)
2387
2388 static void
2389 _vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2390 {
2391     jit_int32_t         gpr;
2392     if (jit_fpr_p(r0)) {
2393         gpr = jit_get_reg(jit_class_gpr);
2394         movi(rn(gpr), i0);
2395         VLDR_F32(r0, rn(gpr), 0);
2396         jit_unget_reg(gpr);
2397     }
2398     else
2399         ldi_i(r0, i0);
2400 }
2401
2402 static void
2403 _vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2404 {
2405     jit_int32_t         reg;
2406     reg = jit_get_reg(jit_class_gpr);
2407     movi(rn(reg), i0);
2408     if (jit_fpr_p(r0))
2409         VLDR_F64(r0, rn(reg), 0);
2410     else {
2411         ldr_i(r0, rn(reg));
2412         ldxi_i(r0 + 1, rn(reg), 4);
2413     }
2414     jit_unget_reg(reg);
2415 }
2416
2417 static void
2418 _vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2419 {
2420     jit_int32_t         reg;
2421     if (jit_fpr_p(r0)) {
2422         reg = jit_get_reg(jit_class_gpr);
2423         addr(rn(reg), r1, r2);
2424         VLDR_F32(r0, rn(reg), 0);
2425         jit_unget_reg(reg);
2426     }
2427     else
2428         ldxr_i(r0, r1, r2);
2429 }
2430
2431 static void
2432 _vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2433 {
2434     jit_int32_t         reg;
2435     reg = jit_get_reg(jit_class_gpr);
2436     addr(rn(reg), r1, r2);
2437     if (jit_fpr_p(r0))
2438         VLDR_F64(r0, rn(reg), 0);
2439     else {
2440         ldr_i(r0, rn(reg));
2441         ldxi_i(r0 + 1, rn(reg), 4);
2442     }
2443     jit_unget_reg(reg);
2444 }
2445
2446 static void
2447 _vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2448 {
2449     jit_int32_t         reg;
2450     if (jit_fpr_p(r0)) {
2451         if (i0 >= 0) {
2452             assert(!(i0 & 3));
2453             if (i0 < 1024)
2454                 VLDR_F32(r0, r1, i0 >> 2);
2455             else {
2456                 reg = jit_get_reg(jit_class_gpr);
2457                 addi(rn(reg), r1, i0);
2458                 VLDR_F32(r0, rn(reg), 0);
2459                 jit_unget_reg(reg);
2460             }
2461         }
2462         else {
2463             i0 = -i0;
2464             assert(!(i0 & 3));
2465             if (i0 < 1024)
2466                 VLDRN_F32(r0, r1, i0 >> 2);
2467             else {
2468                 reg = jit_get_reg(jit_class_gpr);
2469                 subi(rn(reg), r1, i0);
2470                 VLDR_F32(r0, rn(reg), 0);
2471                 jit_unget_reg(reg);
2472             }
2473         }
2474     }
2475     else
2476         ldxi_i(r0, r1, i0);
2477 }
2478
2479 static void
2480 _vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2481 {
2482     jit_int32_t         reg;
2483     if (jit_fpr_p(r0)) {
2484         if (i0 >= 0) {
2485             assert(!(i0 & 3));
2486             if (i0 < 1024)
2487                 VLDR_F64(r0, r1, i0 >> 2);
2488             else {
2489                 reg = jit_get_reg(jit_class_gpr);
2490                 addi(rn(reg), r1, i0);
2491                 VLDR_F64(r0, rn(reg), 0);
2492                 jit_unget_reg(reg);
2493             }
2494         }
2495         else {
2496             i0 = -i0;
2497             assert(!(i0 & 3));
2498             if (i0 < 1024)
2499                 VLDRN_F64(r0, r1, i0 >> 2);
2500             else {
2501                 reg = jit_get_reg(jit_class_gpr);
2502                 subi(rn(reg), r1, i0);
2503                 VLDR_F64(r0, rn(reg), 0);
2504                 jit_unget_reg(reg);
2505             }
2506         }
2507     }
2508     else {
2509         reg = jit_get_reg(jit_class_gpr);
2510         addi(rn(reg), r1, i0);
2511         ldr_i(r0, rn(reg));
2512         ldxi_i(r0 + 1, rn(reg), 4);
2513         jit_unget_reg(reg);
2514     }
2515 }
2516
2517 static void
2518 _vfp_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2519 {
2520     jit_int32_t         t0, r2;
2521     jit_int32_t         t1, r3;
2522     assert(i0 == 4 || i0 == 8);
2523     if (jit_vfp_unaligned_p()) {
2524         t0 = jit_get_reg(jit_class_gpr);
2525         r2 = rn(t0);
2526         if (i0 == 4) {
2527             if (jit_unaligned_p())
2528                 unldr(r2, r1, 4);
2529             else
2530                 ldr(r2, r1);
2531             vfp_movr_w_f(r0, r2);
2532         }
2533         else {
2534             t1 = jit_get_reg(jit_class_gpr);
2535             r3 = rn(t1);
2536 #if __BYTE_ORDER == __LITTLE_ENDIAN
2537             if (jit_unaligned_p()) {
2538                 unldr(r2, r1, 4);
2539                 addi(r3, r1, 4);
2540                 unldr(r3, r3, 4);
2541             }
2542             else {
2543                 ldr(r2, r1);
2544                 ldxi(r3, r1, 4);
2545             }
2546 #else
2547             if (jit_unaligned_p()) {
2548                 unldr(r3, r1, 4);
2549                 addi(r2, r1, 4);
2550                 unldr(r2, r2, 4);
2551             }
2552             else {
2553                 ldr(r3, r1);
2554                 ldxi(r2, r1, 4);
2555             }
2556 #endif
2557             vfp_movr_ww_d(r0, r2, r3);
2558             jit_unget_reg(t1);
2559         }
2560         jit_unget_reg(t0);
2561     }
2562     else {
2563         if (i0 == 4)
2564             vfp_ldr_f(r0, r1);
2565         else
2566             vfp_ldr_d(r0, r1);
2567     }
2568 }
2569
2570 static void
2571 _vfp_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
2572 {
2573    jit_int32_t          t0, r2;
2574     jit_int32_t         t1, r3;
2575     assert(i1 == 4 || i1 == 8);
2576     if (jit_vfp_unaligned_p()) {
2577         t0 = jit_get_reg(jit_class_gpr);
2578         r2 = rn(t0);
2579         if (i1 == 4) {
2580             unldi(r2, i0, 4);
2581             vfp_movr_w_f(r0, r2);
2582         }
2583         else {
2584             t1 = jit_get_reg(jit_class_gpr);
2585             r3 = rn(t1);
2586 #if __BYTE_ORDER == __LITTLE_ENDIAN
2587             if (jit_unaligned_p()) {
2588                 unldi(r2, i0, 4);
2589                 unldi(r3, i0 + 4, 4);
2590             }
2591             else {
2592                 ldi(r2, i0);
2593                 ldi(r3, i0 + 4);
2594             }
2595 #else
2596             if (jit_unaligned_p()) {
2597                 unldi(r3, i0, 4);
2598                 unldi(r2, i0 + 4, 4);
2599             }
2600             else {
2601                 ldi(r3, i0);
2602                 ldi(r2, i0 + 4);
2603             }
2604 #endif
2605             vfp_movr_ww_d(r0, r3, r2);
2606             jit_unget_reg(t1);
2607         }
2608         jit_unget_reg(t0);
2609     }
2610     else {
2611         if (i0 == 4)
2612             vfp_ldi_f(r0, i0);
2613         else
2614             vfp_ldi_d(r0, i0);
2615     }
2616 }
2617
2618 static void
2619 _vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2620 {
2621     jit_int32_t         reg;
2622     if (jit_fpr_p(r0)) {
2623         reg = jit_get_reg(jit_class_gpr);
2624         movi(rn(reg), i0);
2625         VSTR_F32(r0, rn(reg), 0);
2626         jit_unget_reg(reg);
2627     }
2628     else
2629         sti_i(i0, r0);
2630 }
2631
2632 static void
2633 _vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2634 {
2635     jit_int32_t         reg;
2636     reg = jit_get_reg(jit_class_gpr);
2637     movi(rn(reg), i0);
2638     if (jit_fpr_p(r0))
2639         VSTR_F64(r0, rn(reg), 0);
2640     else {
2641         str_i(rn(reg), r0);
2642         stxi_i(4, rn(reg), r0 + 1);
2643     }
2644     jit_unget_reg(reg);
2645 }
2646
2647 static void
2648 _vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2649 {
2650     jit_int32_t         reg;
2651     if (jit_fpr_p(r2)) {
2652         reg = jit_get_reg(jit_class_gpr);
2653         addr(rn(reg), r0, r1);
2654         VSTR_F32(r2, rn(reg), 0);
2655         jit_unget_reg(reg);
2656     }
2657     else
2658         stxr_i(r0, r1, r2);
2659 }
2660
2661 static void
2662 _vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2663 {
2664     jit_int32_t         reg;
2665     reg = jit_get_reg(jit_class_gpr);
2666     addr(rn(reg), r0, r1);
2667     if (jit_fpr_p(r2))
2668         VSTR_F64(r2, rn(reg), 0);
2669     else {
2670         str_i(rn(reg), r2);
2671         stxi_i(4, rn(reg), r2 + 1);
2672     }
2673     jit_unget_reg(reg);
2674 }
2675
2676 static void
2677 _vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2678 {
2679     jit_int32_t         reg;
2680     if (jit_fpr_p(r1)) {
2681         if (i0 >= 0) {
2682             assert(!(i0 & 3));
2683             if (i0 < 1024)
2684                 VSTR_F32(r1, r0, i0 >> 2);
2685             else {
2686                 reg = jit_get_reg(jit_class_gpr);
2687                 addi(rn(reg), r0, i0);
2688                 VSTR_F32(r1, rn(reg), 0);
2689                 jit_unget_reg(reg);
2690             }
2691         }
2692         else {
2693             i0 = -i0;
2694             assert(!(i0 & 3));
2695             if (i0 < 1024)
2696                 VSTRN_F32(r1, r0, i0 >> 2);
2697             else {
2698                 reg = jit_get_reg(jit_class_gpr);
2699                 subi(rn(reg), r0, i0);
2700                 VSTR_F32(r1, rn(reg), 0);
2701                 jit_unget_reg(reg);
2702             }
2703         }
2704     }
2705     else
2706         stxi_i(i0, r0, r1);
2707 }
2708
2709 static void
2710 _vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2711 {
2712     jit_int32_t         reg;
2713     if (jit_fpr_p(r1)) {
2714         if (i0 >= 0) {
2715             assert(!(i0 & 3));
2716             if (i0 < 0124)
2717                 VSTR_F64(r1, r0, i0 >> 2);
2718             else {
2719                 reg = jit_get_reg(jit_class_gpr);
2720                 addi(rn(reg), r0, i0);
2721                 VSTR_F64(r1, rn(reg), 0);
2722                 jit_unget_reg(reg);
2723             }
2724         }
2725         else {
2726             i0 = -i0;
2727             assert(!(i0 & 3));
2728             if (i0 < 1024)
2729                 VSTRN_F64(r1, r0, i0 >> 2);
2730             else {
2731                 reg = jit_get_reg(jit_class_gpr);
2732                 subi(rn(reg), r0, i0);
2733                 VSTR_F64(r1, rn(reg), 0);
2734                 jit_unget_reg(reg);
2735             }
2736         }
2737     }
2738     else {
2739         reg = jit_get_reg(jit_class_gpr);
2740         addi(rn(reg), r0, i0);
2741         str_i(rn(reg), r1);
2742         stxi_i(4, rn(reg), r1 + 1);
2743         jit_unget_reg(reg);
2744     }
2745 }
2746
2747 static void
2748 _vfp_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2749 {
2750     jit_int32_t         t0, r2;
2751     jit_int32_t         t1, r3;
2752     assert(i0 == 4 || i0 == 8);
2753     if (jit_vfp_unaligned_p()) {
2754         t0 = jit_get_reg(jit_class_gpr);
2755         r2 = rn(t0);
2756         if (i0 == 4) {
2757             vfp_movr_f_w(r2, r1);
2758             if (jit_unaligned_p())
2759                 unstr(r0, r2, 4);
2760             else
2761                 str(r0, r2);
2762         }
2763         else {
2764             t1 = jit_get_reg(jit_class_gpr);
2765             r3 = rn(t1);
2766             vfp_movr_d_ww(r2, r3, r1);
2767 #if __BYTE_ORDER == __LITTLE_ENDIAN
2768             if (jit_unaligned_p()) {
2769                 unstr(r0, r2, 4);
2770                 addi(r2, r0, 4);
2771                 unstr(r2, r3, 4);
2772             }
2773             else {
2774                 str(r0, r2);
2775                 stxi(4, r0, r3);
2776             }
2777 #else
2778             if (jit_unaligned_p()) {
2779                 unstr(r0, r3, 4);
2780                 addi(r3, r0, 4);
2781                 unstr(r3, r2, 4);
2782             }
2783             else {
2784                 str(r0, r3);
2785                 stxi(4, r0, r2);
2786             }
2787 #endif
2788             jit_unget_reg(t1);
2789         }
2790         jit_unget_reg(t0);
2791     }
2792     else {
2793         if (i0 == 4)
2794             vfp_str_f(r0, r1);
2795         else
2796             vfp_str_d(r0, r1);
2797     }
2798 }
2799
2800 static void
2801 _vfp_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2802 {
2803     jit_int32_t         t0, r2;
2804     jit_int32_t         t1, r3;
2805     assert(i1 == 4 || i1 == 8);
2806     if (jit_vfp_unaligned_p()) {
2807         t0 = jit_get_reg(jit_class_gpr);
2808         r2 = rn(t0);
2809         if (i1 == 4) {
2810             vfp_movr_f_w(r2, r0);
2811             if (jit_unaligned_p())
2812                 unsti(i0, r2, 4);
2813             else
2814                 sti(i0, r2);
2815         }
2816         else {
2817             t1 = jit_get_reg(jit_class_gpr);
2818             r3 = rn(t1);
2819             vfp_movr_d_ww(r2, r3, r0);
2820 #if __BYTE_ORDER == __LITTLE_ENDIAN
2821             if (jit_unaligned_p()) {
2822                 unsti(i0, r3, 4);
2823                 unsti(i0 + 4, r2, 4);
2824             }
2825             else {
2826                 sti(i0, r3);
2827                 sti(i0 + 4, r2);
2828             }
2829 #else
2830             if (jit_unaligned_p()) {
2831                 unsti(i0, r2, 4);
2832                 unsti(i0 + 4, r3, 4);
2833             }
2834             else {
2835                 sti(i0, r2);
2836                 sti(i0 + 4, r3);
2837             }
2838 #endif
2839             jit_unget_reg(t1);
2840         }
2841         jit_unget_reg(t0);
2842     }
2843     else {
2844         if (i1 == 4)
2845             vfp_sti_f(i0, r0);
2846         else
2847             vfp_sti_d(i0, r0);
2848     }
2849 }
2850
2851 static void
2852 _vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2853 {
2854     jit_int32_t         reg;
2855
2856     assert(_jitc->function->self.call & jit_call_varargs);
2857
2858     /* Adjust pointer. */
2859     reg = jit_get_reg(jit_class_gpr);
2860     andi(rn(reg), r1, 7);
2861     addr(r1, r1, rn(reg));
2862     jit_unget_reg(reg);
2863
2864     /* Load argument. */
2865     vfp_ldr_d(r0, r1);
2866
2867     /* Update stack pointer. */
2868     addi(r1, r1, sizeof(jit_float64_t));
2869 }
2870 #  undef dbopi
2871 #  undef fbopi
2872 #  undef dopi
2873 #  undef fopi
2874 #endif