4b146d255822b9103a1e8afaba318b7f32f92fec
[pcsx_rearmed.git] / deps / lightning / lib / jit_arm-vfp.c
1 /*
2  * Copyright (C) 2012-2022  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #if PROTO
21 /* as per vfp_regno macro, required due to "support" to soft float registers
22  * or using integer registers as arguments to float operations */
23 #  define _D8_REGNO                     32
24 #  define ARM_V_Q                       0x00000040
25 #  define FPSCR_N                       0x80000000 /* Negative flag */
26 #  define FPSCR_Z                       0x40000000 /* Zero flag */
27 #  define FPSCR_C                       0x20000000 /* Carry flag */
28 #  define FPSCR_V                       0x10000000 /* Overflow flag */
29 #  define FPSCR_QC                      0x08000000 /* Cumulative saturation */
30 #  define FPSCR_AHP                     0x04000000 /* Alt. half-precision */
31 #  define FPSCR_DN                      0x02000000 /* Default NaN mode */
32 #  define FPSCR_FZ                      0x01000000 /* Flush to zero */
33 #  define FPSCR_RMASK                   0x00c00000
34 #    define FPSCR_RN                    0x00000000 /* Round to Nearest */
35 #    define FPSCR_RP                    0x00400000 /* Round to Plus Infinity */
36 #    define FPSCR_RM                    0x00800000 /* Round to Minus Infinity */
37 #    define FPSCR_RZ                    0x00c00000 /* Round towards Zero */
38 #  define FPSCR_STRIDE                  0x00300000
39 #  define FPSCR_RES1                    0x00080000 /* Reserved, UNK/SBZP */
40 #  define FPSCR_LEN                     0x00070000
41 #  define FPSCR_IDE                     0x00008000 /* Input Denormal trap */
42 #  define FPSCR_IXE                     0x00001000 /* Inexact trap */
43 #  define FPSCR_UFE                     0x00000800 /* Underflow trap */
44 #  define FPSCR_OFE                     0x00000400 /* Overflow trap */
45 #  define FPSCR_DZE                     0x00000200 /* Division by zero trap */
46 #  define FPSCR_IOE                     0x00000100 /* Invalid Operation trap */
47 #  define FPSCR_IDC                     0x00000080 /* Input Denormal flag */
48 #  define FPSCR_RES0                    0x00000060 /* Reserved, UNK/SBZP */
49 #  define FPSCR_IXC                     0x00000010 /* Inexact flag */
50 #  define FPSCR_UFC                     0x00000008 /* Underflow flag */
51 #  define FPSCR_OFC                     0x00000004 /* Overflow flag */
52 #  define FPSCR_DZC                     0x00000002 /* Division by zero flag */
53 #  define FPSCR_IOC                     0x00000001 /* Invalid Operation flag */
54 #  define ARM_V_E                       0x00000080 /* ARM_VCMP except if NaN */
55 #  define ARM_V_Z                       0x00010000 /* ARM_VCMP with zero */
56 #  define ARM_V_F64                     0x00000100
57 #  define ARM_VADD_F                    0x0e300a00
58 #  define ARM_VSUB_F                    0x0e300a40
59 #  define ARM_VMUL_F                    0x0e200a00
60 #  define ARM_VDIV_F                    0x0e800a00
61 #  define ARM_VABS_F                    0x0eb00ac0
62 #  define ARM_VNEG_F                    0x0eb10a40
63 #  define ARM_VSQRT_F                   0x0eb10ac0
64 #  define ARM_VMOV_F                    0x0eb00a40
65 #  define ARM_VMOV_A_S                  0x0e100a10 /* vmov rn, sn */
66 #  define ARM_VMOV_S_A                  0x0e000a10 /* vmov sn, rn */
67 #  define ARM_VMOV_AA_D                 0x0c500b10 /* vmov rn,rn, dn */
68 #  define ARM_VMOV_D_AA                 0x0c400b10 /* vmov dn, rn,rn */
69 #  define ARM_VCMP                      0x0eb40a40
70 #  define ARM_VMRS                      0x0ef10a10
71 #  define ARM_VMSR                      0x0ee10a10
72 #  define ARM_VCVT_2I                   0x00040000 /* to integer */
73 #  define ARM_VCVT_2S                   0x00010000 /* to signed */
74 #  define ARM_VCVT_RS                   0x00000080 /* round to zero or signed */
75 #  define ARM_VCVT                      0x0eb80a40
76 #  define ARM_VCVT_S32_F32              ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
77 #  define ARM_VCVT_U32_F32              ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
78 #  define ARM_VCVT_S32_F64              ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
79 #  define ARM_VCVT_U32_F64              ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
80 #  define ARM_VCVT_F32_S32              ARM_VCVT|ARM_VCVT_RS
81 #  define ARM_VCVT_F32_U32              ARM_VCVT
82 #  define ARM_VCVT_F64_S32              ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
83 #  define ARM_VCVT_F64_U32              ARM_VCVT|ARM_V_F64
84 #  define ARM_VCVT_F                    0x0eb70ac0
85 #  define ARM_VCVT_F32_F64              ARM_VCVT_F
86 #  define ARM_VCVT_F64_F32              ARM_VCVT_F|ARM_V_F64
87 #  define ARM_VCVTR_S32_F32             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
88 #  define ARM_VCVTR_U32_F32             ARM_VCVT|ARM_VCVT_2I
89 #  define ARM_VCVTR_S32_F64             ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
90 #  define ARM_VCVTR_U32_F64             ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
91 #  define ARM_V_D                       0x00400000
92 #  define ARM_V_N                       0x00000080
93 #  define ARM_V_Q                       0x00000040
94 #  define ARM_V_M                       0x00000020
95 #  define ARM_V_U                       0x01000000
96 #  define ARM_V_I16                     0x00100000
97 #  define ARM_V_I32                     0x00200000
98 #  define ARM_V_I64                     0x00300000
99 #  define ARM_V_S16                     0x00040000
100 #  define ARM_V_S32                     0x00080000
101 #  define ARM_VADD_I                    0x02000800
102 #  define ARM_VQADD_I                   0x02000010 /* set flag on over/carry */
103 #  define ARM_VADDL_I                   0x02800000 /* q=d+d */
104 #  define ARM_VADDW_I                   0x02800100 /* q=q+d */
105 #  define ARM_VSUB_I                    0x03000800
106 #  define ARM_VQSUB_I                   0x02000210 /* set flag on over/carry */
107 #  define ARM_VSUBL_I                   0x02800200
108 #  define ARM_VSUBW_I                   0x02800300
109 #  define ARM_VMUL_I                    0x02000910
110 #  define ARM_VMULL_I                   0x02800c00
111 #  define ARM_VABS_I                    0x03b10300
112 #  define ARM_VQABS_I                   0x03b00700 /* sets flag on overflow */
113 #  define ARM_VNEG_I                    0x03b10380
114 #  define ARM_VQNEG_I                   0x03b00780 /* sets flag on overflow */
115 #  define ARM_VAND                      0x02000110
116 #  define ARM_VBIC                      0x02100110
117 #  define ARM_VORR                      0x02200110
118 #  define ARM_VORN                      0x02300110
119 #  define ARM_VEOR                      0x03000110
120 #  define ARM_VMOVL_S8                  0x00080000
121 #  define ARM_VMOVL_S16                 0x00100000
122 #  define ARM_VMOVL_S32                 0x00200000
123 #  define ARM_VMOVL_I                   0x02800a10
124 #  define ARM_VMOVI                     0x02800010
125 #  define ARM_VMVNI                     0x02800030
126 #  define ARM_VLDR                      0x0d100a00
127 #  define ARM_VSTR                      0x0d000a00
128 #  define ARM_VM                        0x0c000a00
129 #  define ARM_VMOV_ADV_U                0x00800000 /* zero extend */
130 #  define ARM_VMOV_ADV_8                0x00400000
131 #  define ARM_VMOV_ADV_16               0x00000020
132 #  define ARM_VMOV_A_D                  0x0e100b10
133 #  define ARM_VMOV_D_A                  0x0e000b10
134
135 #  define vodi(oi,r0)                   _vodi(_jit,oi,r0)
136 static void _vodi(jit_state_t*,int,int) maybe_unused;
137 #  define voqi(oi,r0)                   _voqi(_jit,oi,r0)
138 static void _voqi(jit_state_t*,int,int) maybe_unused;
139 #  define vo_ss(o,r0,r1)                _cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
140 #  define cc_vo_ss(cc,o,r0,r1)          _cc_vo_ss(_jit,cc,o,r0,r1)
141 static void _cc_vo_ss(jit_state_t*,int,int,int,int);
142 #  define vo_dd(o,r0,r1)                _cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
143 #  define cc_vo_dd(cc,o,r0,r1)          _cc_vo_dd(_jit,cc,o,r0,r1)
144 static void _cc_vo_dd(jit_state_t*,int,int,int,int);
145 #  define vo_qd(o,r0,r1)                _cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
146 #  define cc_vo_qd(cc,o,r0,r1)          _cc_vo_qd(_jit,cc,o,r0,r1)
147 static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
148 #  define vo_qq(o,r0,r1)                _cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
149 #  define cc_vo_qq(cc,o,r0,r1)          _cc_vo_qq(_jit,cc,o,r0,r1)
150 static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
151 #  define vorr_(o,r0,r1)                _cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
152 #  define cc_vorr_(cc,o,r0,r1)          _cc_vorr_(_jit,cc,o,r0,r1)
153 static void _cc_vorr_(jit_state_t*,int,int,int,int);
154 #  define vors_(o,r0,r1)                _cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
155 #  define cc_vors_(cc,o,r0,r1)          _cc_vors_(_jit,cc,o,r0,r1)
156 static void _cc_vors_(jit_state_t*,int,int,int,int);
157 #  define vorv_(o,r0,r1)                _cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
158 #  define cc_vorv_(cc,o,r0,r1)          _cc_vorv_(_jit,cc,o,r0,r1)
159 static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
160 #  define vori_(o,r0,r1)                _cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
161 #  define cc_vori_(cc,o,r0,r1)          _cc_vori_(_jit,cc,o,r0,r1)
162 static void _cc_vori_(jit_state_t*,int,int,int,int);
163 #  define vorrd(o,r0,r1,r2)             _cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
164 #  define cc_vorrd(cc,o,r0,r1,r2)       _cc_vorrd(_jit,cc,o,r0,r1,r2)
165 static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
166 #  define vosss(o,r0,r1,r2)             _cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
167 #  define cc_vosss(cc,o,r0,r1,r2)       _cc_vosss(_jit,cc,o,r0,r1,r2)
168 static void _cc_vosss(jit_state_t*,int,int,int,int,int);
169 #  define voddd(o,r0,r1,r2)             _cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
170 #  define cc_voddd(cc,o,r0,r1,r2)       _cc_voddd(_jit,cc,o,r0,r1,r2)
171 static void _cc_voddd(jit_state_t*,int,int,int,int,int);
172 #  define voqdd(o,r0,r1,r2)             _cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
173 #  define cc_voqdd(cc,o,r0,r1,r2)       _cc_voqdd(_jit,cc,o,r0,r1,r2)
174 static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
175 #  define voqqd(o,r0,r1,r2)             _cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
176 #  define cc_voqqd(cc,o,r0,r1,r2)       _cc_voqqd(_jit,cc,o,r0,r1,r2)
177 static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
178 #  define voqqq(o,r0,r1,r2)             _cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
179 #  define cc_voqqq(cc,o,r0,r1,r2)       _cc_voqqq(_jit,cc,o,r0,r1,r2)
180 static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
181 #  define cc_vldst(cc,o,r0,r1,i0)       _cc_vldst(_jit,cc,o,r0,r1,i0)
182 static void _cc_vldst(jit_state_t*,int,int,int,int,int);
183 #  define cc_vorsl(cc,o,r0,r1,i0)       _cc_vorsl(_jit,cc,o,r0,r1,i0)
184 static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
185 #  define CC_VADD_F32(cc,r0,r1,r2)      cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
186 #  define VADD_F32(r0,r1,r2)            CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
187 #  define CC_VADD_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
188 #  define VADD_F64(r0,r1,r2)            CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
189 #  define CC_VSUB_F32(cc,r0,r1,r2)      cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
190 #  define VSUB_F32(r0,r1,r2)            CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
191 #  define CC_VSUB_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
192 #  define VSUB_F64(r0,r1,r2)            CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
193 #  define CC_VMUL_F32(cc,r0,r1,r2)      cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
194 #  define VMUL_F32(r0,r1,r2)            CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
195 #  define CC_VMUL_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
196 #  define VMUL_F64(r0,r1,r2)            CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
197 #  define CC_VDIV_F32(cc,r0,r1,r2)      cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
198 #  define VDIV_F32(r0,r1,r2)            CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
199 #  define CC_VDIV_F64(cc,r0,r1,r2)      cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
200 #  define VDIV_F64(r0,r1,r2)            CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
201 #  define CC_VABS_F32(cc,r0,r1)         cc_vo_ss(cc,ARM_VABS_F,r0,r1)
202 #  define VABS_F32(r0,r1)               CC_VABS_F32(ARM_CC_AL,r0,r1)
203 #  define CC_VABS_F64(cc,r0,r1)         cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
204 #  define VABS_F64(r0,r1)               CC_VABS_F64(ARM_CC_AL,r0,r1)
205 #  define CC_VNEG_F32(cc,r0,r1)         cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
206 #  define VNEG_F32(r0,r1)               CC_VNEG_F32(ARM_CC_AL,r0,r1)
207 #  define CC_VNEG_F64(cc,r0,r1)         cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
208 #  define VNEG_F64(r0,r1)               CC_VNEG_F64(ARM_CC_AL,r0,r1)
209 #  define CC_VSQRT_F32(cc,r0,r1)        cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
210 #  define VSQRT_F32(r0,r1)              CC_VSQRT_F32(ARM_CC_AL,r0,r1)
211 #  define CC_VSQRT_F64(cc,r0,r1)        cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
212 #  define VSQRT_F64(r0,r1)              CC_VSQRT_F64(ARM_CC_AL,r0,r1)
213 #  define CC_VMOV_F32(cc,r0,r1)         cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
214 #  define VMOV_F32(r0,r1)               CC_VMOV_F32(ARM_CC_AL,r0,r1)
215 #  define CC_VMOV_F64(cc,r0,r1)         cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
216 #  define VMOV_F64(r0,r1)               CC_VMOV_F64(ARM_CC_AL,r0,r1)
217 #  define CC_VMOV_AA_D(cc,r0,r1,r2)     cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
218 #  define VMOV_AA_D(r0,r1,r2)           CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
219 #  define CC_VMOV_D_AA(cc,r0,r1,r2)     cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
220 #  define VMOV_D_AA(r0,r1,r2)           CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
221 #  define CC_VMOV_A_S(cc,r0,r1)         cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
222 #  define VMOV_A_S(r0,r1)               CC_VMOV_A_S(ARM_CC_AL,r0,r1)
223 #  define CC_VMOV_S_A(cc,r0,r1)         cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
224 #  define VMOV_S_A(r0,r1)               CC_VMOV_S_A(ARM_CC_AL,r0,r1)
225 #  define CC_VCMP_F32(cc,r0,r1)         cc_vo_ss(cc,ARM_VCMP,r0,r1)
226 #  define VCMP_F32(r0,r1)               CC_VCMP_F32(ARM_CC_AL,r0,r1)
227 #  define CC_VCMP_F64(cc,r0,r1)         cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
228 #  define VCMP_F64(r0,r1)               CC_VCMP_F64(ARM_CC_AL,r0,r1)
229 #  define CC_VCMPE_F32(cc,r0,r1)        cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
230 #  define VCMPE_F32(r0,r1)              CC_VCMPE_F32(ARM_CC_AL,r0,r1)
231 #  define CC_VCMPE_F64(cc,r0,r1)        cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
232 #  define VCMPE_F64(r0,r1)              CC_VCMPE_F64(ARM_CC_AL,r0,r1)
233 #  define CC_VCMPZ_F32(cc,r0)           cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
234 #  define VCMPZ_F32(r0)                 CC_VCMPZ_F32(ARM_CC_AL,r0)
235 #  define CC_VCMPZ_F64(cc,r0)           cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
236 #  define VCMPZ_F64(r0)                 CC_VCMPZ_F64(ARM_CC_AL,r0)
237 #  define CC_VCMPEZ_F32(cc,r0)          cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
238 #  define VCMPEZ_F32(r0)                CC_VCMPEZ_F32(ARM_CC_AL,r0)
239 #  define CC_VCMPEZ_F64(cc,r0)          cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
240 #  define VCMPEZ_F64(r0)                CC_VCMPEZ_F64(ARM_CC_AL,r0)
241 #  define CC_VMRS(cc,r0)                cc_vorr_(cc,ARM_VMRS,r0,0)
242 #  define VMRS(r0)                      CC_VMRS(ARM_CC_AL,r0)
243 #  define CC_VMSR(cc,r0)                cc_vorr_(cc,ARM_VMSR,r0,0)
244 #  define VMSR(r0)                      CC_VMSR(ARM_CC_AL,r0)
245 #  define CC_VCVT_S32_F32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
246 #  define VCVT_S32_F32(r0,r1)           CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
247 #  define CC_VCVT_U32_F32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
248 #  define VCVT_U32_F32(r0,r1)           CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
249 #  define CC_VCVT_S32_F64(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
250 #  define VCVT_S32_F64(r0,r1)           CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
251 #  define CC_VCVT_U32_F64(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
252 #  define VCVT_U32_F64(r0,r1)           CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
253 #  define CC_VCVT_F32_S32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
254 #  define VCVT_F32_S32(r0,r1)           CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
255 #  define CC_VCVT_F32_U32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
256 #  define VCVT_F32_U32(r0,r1)           CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
257 #  define CC_VCVT_F64_S32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
258 #  define VCVT_F64_S32(r0,r1)           CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
259 #  define CC_VCVT_F64_U32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
260 #  define VCVT_F64_U32(r0,r1)           CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
261 #  define CC_VCVT_F32_F64(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
262 #  define VCVT_F32_F64(r0,r1)           CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
263 #  define CC_VCVT_F64_F32(cc,r0,r1)     cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
264 #  define VCVT_F64_F32(r0,r1)           CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
265 #  define CC_VCVTR_S32_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
266 #  define VCVTR_S32_F32(r0,r1)          CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
267 #  define CC_VCVTR_U32_F32(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
268 #  define VCVTR_U32_F32(r0,r1)          CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
269 #  define CC_VCVTR_S32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
270 #  define VCVTR_S32_F64(r0,r1)          CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
271 #  define CC_VCVTR_U32_F64(cc,r0,r1)    cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
272 #  define VCVTR_U32_F64(r0,r1)          CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
273 #  define CC_VLDMIA_F32(cc,r0,r1,i0)    cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
274 #  define VLDMIA_F32(r0,r1,i0)          CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
275 #  define CC_VLDMIA_F64(cc,r0,r1,i0)    cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
276 #  define VLDMIA_F64(r0,r1,i0)          CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
277 #  define CC_VSTMIA_F32(cc,r0,r1,i0)    cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
278 #  define VSTMIA_F32(r0,r1,i0)          CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
279 #  define CC_VSTMIA_F64(cc,r0,r1,i0)    cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
280 #  define VSTMIA_F64(r0,r1,i0)          CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
281 #  define CC_VLDMIA_U_F32(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
282 #  define VLDMIA_U_F32(r0,r1,i0)        CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
283 #  define CC_VLDMIA_U_F64(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
284 #  define VLDMIA_U_F64(r0,r1,i0)        CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
285 #  define CC_VSTMIA_U_F32(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
286 #  define VSTMIA_U_F32(r0,r1,i0)        CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
287 #  define CC_VSTMIA_U_F64(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
288 #  define VSTMIA_U_F64(r0,r1,i0)        CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
289 #  define CC_VLDMDB_U_F32(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
290 #  define VLDMDB_U_F32(r0,r1,i0)        CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
291 #  define CC_VLDMDB_U_F64(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
292 #  define VLDMDB_U_F64(r0,r1,i0)        CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
293 #  define CC_VSTMDB_U_F32(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
294 #  define VSTMDB_U_F32(r0,r1,i0)        CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
295 #  define CC_VSTMDB_U_F64(cc,r0,r1,i0)  cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
296 #  define VSTMDB_U_F64(r0,r1,i0)        CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
297 #  define CC_VPUSH_F32(cc,r0,i0)        CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
298 #  define VPUSH_F32(r0,i0)              CC_VPUSH_F32(ARM_CC_AL,r0,i0)
299 #  define CC_VPUSH_F64(cc,r0,i0)        CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
300 #  define VPUSH_F64(r0,i0)              CC_VPUSH_F64(ARM_CC_AL,r0,i0)
301 #  define CC_VPOP_F32(cc,r0,i0)         CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
302 #  define VPOP_F32(r0,i0)               CC_VPOP_F32(ARM_CC_AL,r0,i0)
303 #  define CC_VPOP_F64(cc,r0,i0)         CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
304 #  define VPOP_F64(r0,i0)               CC_VPOP_F64(ARM_CC_AL,r0,i0)
305 #  define CC_VMOV_A_S8(cc,r0,r1)        cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
306 #  define VMOV_A_S8(r0,r1)              CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
307 #  define CC_VMOV_A_U8(cc,r0,r1)        cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
308 #  define VMOV_A_U8(r0,r1)              CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
309 #  define CC_VMOV_A_S16(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
310 #  define VMOV_A_S16(r0,r1)             CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
311 #  define CC_VMOV_A_U16(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
312 #  define VMOV_A_U16(r0,r1)             CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
313 #  define CC_VMOV_A_S32(cc,r0,r1)       cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
314 #  define VMOV_A_S32(r0,r1)             CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
315 #  define CC_VMOV_A_U32(cc,r0,r1)       cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
316 #  define VMOV_A_U32(r0,r1)             CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
317 #  define CC_VMOV_V_I8(cc,r0,r1)        cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
318 #  define VMOV_V_I8(r0,r1)              CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
319 #  define CC_VMOV_V_I16(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
320 #  define VMOV_V_I16(r0,r1)             CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
321 #  define CC_VMOV_V_I32(cc,r0,r1)       cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
322 #  define VMOV_V_I32(r0,r1)             CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
323 #  define VADD_I8(r0,r1,r2)             voddd(ARM_VADD_I,r0,r1,r2)
324 #  define VADDQ_I8(r0,r1,r2)            voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
325 #  define VADD_I16(r0,r1,r2)            voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
326 #  define VADDQ_I16(r0,r1,r2)           voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
327 #  define VADD_I32(r0,r1,r2)            voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
328 #  define VADDQ_I32(r0,r1,r2)           voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
329 #  define VADD_I64(r0,r1,r2)            voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
330 #  define VADDQ_I64(r0,r1,r2)           voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
331 #  define VQADD_S8(r0,r1,r2)            voddd(ARM_VQADD_I,r0,r1,r2)
332 #  define VQADDQ_S8(r0,r1,r2)           voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
333 #  define VQADD_U8(r0,r1,r2)            voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
334 #  define VQADDQ_U8(r0,r1,r2)           voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
335 #  define VQADD_S16(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
336 #  define VQADDQ_S16(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
337 #  define VQADD_U16(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
338 #  define VQADDQ_U16(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
339 #  define VQADD_S32(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
340 #  define VQADDQ_S32(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
341 #  define VQADD_U32(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
342 #  define VQADDQ_U32(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
343 #  define VQADD_S64(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
344 #  define VQADDQ_S64(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
345 #  define VQADD_U64(r0,r1,r2)           voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
346 #  define VQADDQ_U64(r0,r1,r2)          voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
347 #  define VADDL_S8(r0,r1,r2)            voqdd(ARM_VADDL_I,r0,r1,r2)
348 #  define VADDL_U8(r0,r1,r2)            voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
349 #  define VADDL_S16(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
350 #  define VADDL_U16(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
351 #  define VADDL_S32(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
352 #  define VADDL_U32(r0,r1,r2)           voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
353 #  define VADDW_S8(r0,r1,r2)            voqqd(ARM_VADDW_I,r0,r1,r2)
354 #  define VADDW_U8(r0,r1,r2)            voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
355 #  define VADDW_S16(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
356 #  define VADDW_U16(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
357 #  define VADDW_S32(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
358 #  define VADDW_U32(r0,r1,r2)           voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
359 #  define VSUB_I8(r0,r1,r2)             voddd(ARM_VSUB_I,r0,r1,r2)
360 #  define VSUBQ_I8(r0,r1,r2)            voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
361 #  define VSUB_I16(r0,r1,r2)            voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
362 #  define VSUBQ_I16(r0,r1,r2)           voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
363 #  define VSUB_I32(r0,r1,r2)            voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
364 #  define VSUBQ_I32(r0,r1,r2)           voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
365 #  define VSUB_I64(r0,r1,r2)            voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
366 #  define VSUBQ_I64(r0,r1,r2)           voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
367 #  define VQSUB_S8(r0,r1,r2)            voddd(ARM_VQSUB_I,r0,r1,r2)
368 #  define VQSUBQ_S8(r0,r1,r2)           voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
369 #  define VQSUB_U8(r0,r1,r2)            voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
370 #  define VQSUBQ_U8(r0,r1,r2)           voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
371 #  define VQSUB_S16(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
372 #  define VQSUBQ_S16(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
373 #  define VQSUB_U16(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
374 #  define VQSUBQ_U16(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
375 #  define VQSUB_S32(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
376 #  define VQSUBQ_S32(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
377 #  define VQSUB_U32(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
378 #  define VQSUBQ_U32(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
379 #  define VQSUB_S64(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
380 #  define VQSUBQ_S64(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
381 #  define VQSUB_U64(r0,r1,r2)           voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
382 #  define VQSUBQ_U64(r0,r1,r2)          voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
383 #  define VSUBL_S8(r0,r1,r2)            voqdd(ARM_VSUBL_I,r0,r1,r2)
384 #  define VSUBL_U8(r0,r1,r2)            voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
385 #  define VSUBL_S16(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
386 #  define VSUBL_U16(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
387 #  define VSUBL_S32(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
388 #  define VSUBL_U32(r0,r1,r2)           voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
389 #  define VSUBW_S8(r0,r1,r2)            voqqd(ARM_VSUBW_I,r0,r1,r2)
390 #  define VSUBW_U8(r0,r1,r2)            voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
391 #  define VSUBW_S16(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
392 #  define VSUBW_U16(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
393 #  define VSUBW_S32(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
394 #  define VSUBW_U32(r0,r1,r2)           voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
395 #  define VMUL_I8(r0,r1,r2)             voddd(ARM_VMUL_I,r0,r1,r2)
396 #  define VMULQ_I8(r0,r1,r2)            voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
397 #  define VMUL_I16(r0,r1,r2)            voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
398 #  define VMULQ_I16(r0,r1,r2)           voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
399 #  define VMUL_I32(r0,r1,r2)            voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
400 #  define VMULQ_I32(r0,r1,r2)           voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
401 #  define VMULL_S8(r0,r1,r2)            voddd(ARM_VMULL_I,r0,r1,r2)
402 #  define VMULL_U8(r0,r1,r2)            voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
403 #  define VMULL_S16(r0,r1,r2)           voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
404 #  define VMULL_U16(r0,r1,r2)           voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
405 #  define VMULL_S32(r0,r1,r2)           voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
406 #  define VMULL_U32(r0,r1,r2)           voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
407 #  define VABS_S8(r0,r1)                vo_dd(ARM_VABS_I,r0,r1)
408 #  define VABSQ_S8(r0,r1)               vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
409 #  define VABS_S16(r0,r1)               vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
410 #  define VABSQ_S16(r0,r1)              vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
411 #  define VABS_S32(r0,r1)               vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
412 #  define VABSQ_S32(r0,r1)              vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
413 #  define VQABS_S8(r0,r1)               vo_dd(ARM_VQABS_I,r0,r1)
414 #  define VQABSQ_S8(r0,r1)              vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
415 #  define VQABS_S16(r0,r1)              vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
416 #  define VQABSQ_S16(r0,r1)             vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
417 #  define VQABS_S32(r0,r1)              vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
418 #  define VQABSQ_S32(r0,r1)             vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
419 #  define VNEG_S8(r0,r1)                vo_dd(ARM_VNEG_I,r0,r1)
420 #  define VNEGQ_S8(r0,r1)               vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
421 #  define VNEG_S16(r0,r1)               vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
422 #  define VNEGQ_S16(r0,r1)              vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
423 #  define VNEG_S32(r0,r1)               vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
424 #  define VNEGQ_S32(r0,r1)              vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
425 #  define VQNEG_S8(r0,r1)               vo_dd(ARM_VQNEG_I,r0,r1)
426 #  define VQNEGQ_S8(r0,r1)              vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
427 #  define VQNEG_S16(r0,r1)              vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
428 #  define VQNEGQ_S16(r0,r1)             vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
429 #  define VQNEG_S32(r0,r1)              vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
430 #  define VQNEGQ_S32(r0,r1)             vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
431 #  define VAND(r0,r1,r2)                voddd(ARM_VAND,r0,r1,r2)
432 #  define VANDQ(r0,r1,r2)               voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
433 #  define VBIC(r0,r1,r2)                voddd(ARM_VBIC,r0,r1,r2)
434 #  define VBICQ(r0,r1,r2)               voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
435 #  define VORR(r0,r1,r2)                voddd(ARM_VORR,r0,r1,r2)
436 #  define VORRQ(r0,r1,r2)               voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
437 #  define VORN(r0,r1,r2)                voddd(ARM_VORN,r0,r1,r2)
438 #  define VORNQ(r0,r1,r2)               voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
439 #  define VEOR(r0,r1,r2)                voddd(ARM_VEOR,r0,r1,r2)
440 #  define VEORQ(r0,r1,r2)               voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
441 #  define VMOV(r0,r1)                   VORR(r0,r1,r1)
442 #  define VMOVQ(r0,r1)                  VORRQ(r0,r1,r1)
443 #  define VMOVL_S8(r0,r1)               vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
444 #  define VMOVL_U8(r0,r1)               vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
445 #  define VMOVL_S16(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
446 #  define VMOVL_U16(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
447 #  define VMOVL_S32(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
448 #  define VMOVL_U32(r0,r1)              vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
449 /* "oi" should be the result of encode_vfp_double */
450 #  define VIMM(oi,r0)                   vodi(oi,r0)
451 #  define VIMMQ(oi,r0)                  voqi(oi|ARM_V_Q,r0)
452 /* index is multipled by four */
453 #  define CC_VLDRN_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR,r0,r1,i0)
454 #  define VLDRN_F32(r0,r1,i0)           CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
455 #  define CC_VLDR_F32(cc,r0,r1,i0)      cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
456 #  define VLDR_F32(r0,r1,i0)            CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
457 #  define CC_VLDRN_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
458 #  define VLDRN_F64(r0,r1,i0)           CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
459 #  define CC_VLDR_F64(cc,r0,r1,i0)      cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
460 #  define VLDR_F64(r0,r1,i0)            CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
461 #  define CC_VSTRN_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR,r0,r1,i0)
462 #  define VSTRN_F32(r0,r1,i0)           CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
463 #  define CC_VSTR_F32(cc,r0,r1,i0)      cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
464 #  define VSTR_F32(r0,r1,i0)            CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
465 #  define CC_VSTRN_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
466 #  define VSTRN_F64(r0,r1,i0)           CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
467 #  define CC_VSTR_F64(cc,r0,r1,i0)      cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
468 #  define VSTR_F64(r0,r1,i0)            CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
469 #  define vfp_movr_f(r0,r1)             _vfp_movr_f(_jit,r0,r1)
470 static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
471 #  define vfp_movr_d(r0,r1)             _vfp_movr_d(_jit,r0,r1)
472 static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
473 #  define vfp_movi_f(r0,i0)             _vfp_movi_f(_jit,r0,i0)
474 static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
475 #  define vfp_movi_d(r0,i0)             _vfp_movi_d(_jit,r0,i0)
476 static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
477 #  define vfp_extr_f(r0,r1)             _vfp_extr_f(_jit,r0,r1)
478 static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
479 #  define vfp_extr_d(r0,r1)             _vfp_extr_d(_jit,r0,r1)
480 static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
481 #  define vfp_extr_d_f(r0,r1)           _vfp_extr_d_f(_jit,r0,r1)
482 static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
483 #  define vfp_extr_f_d(r0,r1)           _vfp_extr_f_d(_jit,r0,r1)
484 static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
485 #  define vfp_truncr_f_i(r0,r1)         _vfp_truncr_f_i(_jit,r0,r1)
486 static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
487 #  define vfp_truncr_d_i(r0,r1)         _vfp_truncr_d_i(_jit,r0,r1)
488 static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
489 #  define vfp_absr_f(r0,r1)             VABS_F32(r0,r1)
490 #  define vfp_absr_d(r0,r1)             VABS_F64(r0,r1)
491 #  define vfp_negr_f(r0,r1)             VNEG_F32(r0,r1)
492 #  define vfp_negr_d(r0,r1)             VNEG_F64(r0,r1)
493 #  define vfp_sqrtr_f(r0,r1)            VSQRT_F32(r0,r1)
494 #  define vfp_sqrtr_d(r0,r1)            VSQRT_F64(r0,r1)
495 #  define vfp_addr_f(r0,r1,r2)          VADD_F32(r0,r1,r2)
496 #  define vfp_addi_f(r0,r1,i0)          _vfp_addi_f(_jit,r0,r1,i0)
497 static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
498 #  define vfp_addr_d(r0,r1,r2)          VADD_F64(r0,r1,r2)
499 #  define vfp_addi_d(r0,r1,i0)          _vfp_addi_d(_jit,r0,r1,i0)
500 static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
501 #  define vfp_subr_f(r0,r1,r2)          VSUB_F32(r0,r1,r2)
502 #  define vfp_subi_f(r0,r1,i0)          _vfp_subi_f(_jit,r0,r1,i0)
503 static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
504 #  define vfp_subr_d(r0,r1,r2)          VSUB_F64(r0,r1,r2)
505 #  define vfp_subi_d(r0,r1,i0)          _vfp_subi_d(_jit,r0,r1,i0)
506 static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
507 #  define vfp_rsbr_f(r0,r1,r2)          vfp_subr_f(r0,r2,r1)
508 #  define vfp_rsbi_f(r0,r1,i0)          _vfp_rsbi_f(_jit,r0,r1,i0)
509 static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
510 #  define vfp_rsbr_d(r0,r1,r2)          vfp_subr_d(r0,r2,r1)
511 #  define vfp_rsbi_d(r0,r1,i0)          _vfp_rsbi_d(_jit,r0,r1,i0)
512 static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
513 #  define vfp_mulr_f(r0,r1,r2)          VMUL_F32(r0,r1,r2)
514 #  define vfp_muli_f(r0,r1,i0)          _vfp_muli_f(_jit,r0,r1,i0)
515 static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
516 #  define vfp_mulr_d(r0,r1,r2)          VMUL_F64(r0,r1,r2)
517 #  define vfp_muli_d(r0,r1,i0)          _vfp_muli_d(_jit,r0,r1,i0)
518 static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
519 #  define vfp_divr_f(r0,r1,r2)          VDIV_F32(r0,r1,r2)
520 #  define vfp_divi_f(r0,r1,i0)          _vfp_divi_f(_jit,r0,r1,i0)
521 static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
522 #  define vfp_divr_d(r0,r1,r2)          VDIV_F64(r0,r1,r2)
523 #  define vfp_divi_d(r0,r1,i0)          _vfp_divi_d(_jit,r0,r1,i0)
524 static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
525 #  define vfp_cmp_f(r0,r1)              _vfp_cmp_f(_jit,r0,r1)
526 static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
527 #  define vfp_cmp_d(r0,r1)              _vfp_cmp_d(_jit,r0,r1)
528 static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
529 #  define vcmp01_x(c0,c1,r0)            _vcmp01_x(_jit,c0,c1,r0)
530 static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
531 #  define vcmp01_f(c0,c1,r0,r1,r2)      _vcmp01_f(_jit,c0,c1,r0,r1,r2)
532 static void _vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
533 #  define vcmp01_d(c0,c1,r0,r1,r2)      _vcmp01_d(_jit,c0,c1,r0,r1,r2)
534 static void _vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
535 #  define vfp_ltr_f(r0,r1,r2)           vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
536 #  define vfp_lti_f(r0,r1,i0)           _vfp_lti_f(_jit,r0,r1,i0)
537 static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
538 #  define vfp_ltr_d(r0,r1,r2)           vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
539 #  define vfp_lti_d(r0,r1,i0)           _vfp_lti_d(_jit,r0,r1,i0)
540 static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
541 #  define vfp_ler_f(r0,r1,r2)           vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
542 #  define vfp_lei_f(r0,r1,i0)           _vfp_lei_f(_jit,r0,r1,i0)
543 static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
544 #  define vfp_ler_d(r0,r1,r2)           vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
545 #  define vfp_lei_d(r0,r1,i0)           _vfp_lei_d(_jit,r0,r1,i0)
546 static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
547 #  define vfp_eqr_f(r0,r1,r2)           vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
548 #  define vfp_eqi_f(r0,r1,i0)           _vfp_eqi_f(_jit,r0,r1,i0)
549 static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
550 #  define vfp_eqr_d(r0,r1,r2)           vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
551 #  define vfp_eqi_d(r0,r1,i0)           _vfp_eqi_d(_jit,r0,r1,i0)
552 static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
553 #  define vfp_ger_f(r0,r1,r2)           vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
554 #  define vfp_gei_f(r0,r1,i0)           _vfp_gei_f(_jit,r0,r1,i0)
555 static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
556 #  define vfp_ger_d(r0,r1,r2)           vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
557 #  define vfp_gei_d(r0,r1,i0)           _vfp_gei_d(_jit,r0,r1,i0)
558 static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
559 #  define vfp_gtr_f(r0,r1,r2)           vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
560 #  define vfp_gti_f(r0,r1,i0)           _vfp_gti_f(_jit,r0,r1,i0)
561 static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
562 #  define vfp_gtr_d(r0,r1,r2)           vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
563 #  define vfp_gti_d(r0,r1,i0)           _vfp_gti_d(_jit,r0,r1,i0)
564 static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
565 #  define vfp_ner_f(r0,r1,r2)           vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
566 #  define vfp_nei_f(r0,r1,i0)           _vfp_nei_f(_jit,r0,r1,i0)
567 static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
568 #  define vfp_ner_d(r0,r1,r2)           vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
569 #  define vfp_nei_d(r0,r1,i0)           _vfp_nei_d(_jit,r0,r1,i0)
570 static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
571 #  define vcmp10_x(c0,r0)               _vcmp10_x(_jit,c0,r0)
572 static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
573 #  define vcmp_10_f(c0,r0,r1,r2)        _vcmp_10_f(_jit,c0,r0,r1,r2)
574 static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
575 #  define vcmp_10_d(c0,r0,r1,r2)        _vcmp_10_d(_jit,c0,r0,r1,r2)
576 static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
577 #  define vfp_unltr_f(r0,r1,r2)         vcmp_10_f(ARM_CC_GE,r0,r1,r2)
578 #  define vfp_unlti_f(r0,r1,i0)         _vfp_unlti_f(_jit,r0,r1,i0)
579 static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
580 #  define vfp_unltr_d(r0,r1,r2)         vcmp_10_d(ARM_CC_GE,r0,r1,r2)
581 #  define vfp_unlti_d(r0,r1,i0)         _vfp_unlti_d(_jit,r0,r1,i0)
582 static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
583 #  define vfp_unler_f(r0,r1,r2)         vcmp_10_f(ARM_CC_GT,r0,r1,r2)
584 #  define vfp_unlei_f(r0,r1,i0)         _vfp_unlei_f(_jit,r0,r1,i0)
585 static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
586 #  define vfp_unler_d(r0,r1,r2)         vcmp_10_d(ARM_CC_GT,r0,r1,r2)
587 #  define vfp_unlei_d(r0,r1,i0)         _vfp_unlei_d(_jit,r0,r1,i0)
588 static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
589 #  define vfp_uneqr_x(r0)               _vfp_uneqr_x(_jit,r0)
590 static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
591 #  define vfp_uneqr_f(r0,r1,r2)         _vfp_uneqr_f(_jit,r0,r1,r2)
592 static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
593 #  define vfp_uneqi_f(r0,r1,i0)         _vfp_uneqi_f(_jit,r0,r1,i0)
594 static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
595 #  define vfp_uneqr_d(r0,r1,r2)         _vfp_uneqr_d(_jit,r0,r1,r2)
596 static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
597 #  define vfp_uneqi_d(r0,r1,i0)         _vfp_uneqi_d(_jit,r0,r1,i0)
598 static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
599 #  define vcmp_01_x(c0,r0)              _vcmp_01_x(_jit,c0,r0)
600 static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
601 #  define vcmp_01_f(c0,r0,r1,r2)        _vcmp_01_f(_jit,c0,r0,r1,r2)
602 static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
603 #  define vcmp_01_d(c0,r0,r1,r2)        _vcmp_01_d(_jit,c0,r0,r1,r2)
604 static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
605 #  define vfp_unger_f(r0,r1,r2)         vcmp_01_f(ARM_CC_CS,r0,r1,r2)
606 #  define vfp_ungei_f(r0,r1,i0)         _vfp_ungei_f(_jit,r0,r1,i0)
607 static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
608 #  define vfp_unger_d(r0,r1,r2)         vcmp_01_d(ARM_CC_CS,r0,r1,r2)
609 #  define vfp_ungei_d(r0,r1,i0)         _vfp_ungei_d(_jit,r0,r1,i0)
610 static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
611 #  define vfp_ungtr_f(r0,r1,r2)         vcmp_01_f(ARM_CC_HI,r0,r1,r2)
612 #  define vfp_ungti_f(r0,r1,i0)         _vfp_ungti_f(_jit,r0,r1,i0)
613 static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
614 #  define vfp_ungtr_d(r0,r1,r2)         vcmp_01_d(ARM_CC_HI,r0,r1,r2)
615 #  define vfp_ungti_d(r0,r1,i0)         _vfp_ungti_d(_jit,r0,r1,i0)
616 static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
617 #  define vfp_ltgtr_x(r0)               _vfp_ltgtr_x(_jit,r0)
618 static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
619 #  define vfp_ltgtr_f(r0,r1,r2)         _vfp_ltgtr_f(_jit,r0,r1,r2)
620 static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
621 #  define vfp_ltgti_f(r0,r1,i0)         _vfp_ltgti_f(_jit,r0,r1,i0)
622 static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
623 #  define vfp_ltgtr_d(r0,r1,r2)         _vfp_ltgtr_d(_jit,r0,r1,r2)
624 static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
625 #  define vfp_ltgti_d(r0,r1,i0)         _vfp_ltgti_d(_jit,r0,r1,i0)
626 static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
627 #  define vfp_ordr_f(r0,r1,r2)          _vfp_ordr_f(_jit,r0,r1,r2)
628 static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
629 #  define vfp_ordi_f(r0,r1,i0)          _vfp_ordi_f(_jit,r0,r1,i0)
630 static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
631 #  define vfp_ordr_d(r0,r1,r2)          _vfp_ordr_d(_jit,r0,r1,r2)
632 static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
633 #  define vfp_ordi_d(r0,r1,i0)          _vfp_ordi_d(_jit,r0,r1,i0)
634 static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
635 #  define vfp_unordr_f(r0,r1,r2)        _vfp_unordr_f(_jit,r0,r1,r2)
636 static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
637 #  define vfp_unordi_f(r0,r1,i0)        _vfp_unordi_f(_jit,r0,r1,i0)
638 static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
639 #  define vfp_unordr_d(r0,r1,r2)        _vfp_unordr_d(_jit,r0,r1,r2)
640 static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
641 #  define vfp_unordi_d(r0,r1,i0)        _vfp_unordi_d(_jit,r0,r1,i0)
642 static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
643 #  define vbcmp_x(cc,i0)                _vbcmp_x(_jit,cc,i0)
644 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
645 #  define vbcmp_f(cc,i0,r0,r1)          _vbcmp_f(_jit,cc,i0,r0,r1)
646 static jit_word_t
647 _vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
648 #  define vbcmp_x(cc,i0)                _vbcmp_x(_jit,cc,i0)
649 static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
650 #  define vbcmp_d(cc,i0,r0,r1)          _vbcmp_d(_jit,cc,i0,r0,r1)
651 static jit_word_t
652 _vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
653 #  define vfp_bltr_f(i0,r0,r1)          vbcmp_f(ARM_CC_MI,i0,r0,r1)
654 #  define vfp_blti_f(i0,r0,i1)          _vfp_blti_f(_jit,i0,r0,i1)
655 static jit_word_t _vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
656 #  define vfp_bltr_d(i0,r0,r1)          vbcmp_d(ARM_CC_MI,i0,r0,r1)
657 static jit_word_t _vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
658 #  define vfp_blti_d(i0,r0,i1)          _vfp_blti_d(_jit,i0,r0,i1)
659 #  define vfp_bler_f(i0,r0,r1)          vbcmp_f(ARM_CC_LS,i0,r0,r1)
660 #  define vfp_blei_f(i0,r0,i1)          _vfp_blei_f(_jit,i0,r0,i1)
661 static jit_word_t _vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
662 #  define vfp_bler_d(i0,r0,r1)          vbcmp_d(ARM_CC_LS,i0,r0,r1)
663 #  define vfp_blei_d(i0,r0,i1)          _vfp_blei_d(_jit,i0,r0,i1)
664 static jit_word_t _vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
665 #  define vfp_beqr_f(i0,r0,r1)          vbcmp_f(ARM_CC_EQ,i0,r0,r1)
666 #  define vfp_beqi_f(i0,r0,i1)          _vfp_beqi_f(_jit,i0,r0,i1)
667 static jit_word_t _vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
668 #  define vfp_beqr_d(i0,r0,r1)          vbcmp_d(ARM_CC_EQ,i0,r0,r1)
669 #  define vfp_beqi_d(i0,r0,i1)          _vfp_beqi_d(_jit,i0,r0,i1)
670 static jit_word_t _vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
671 #  define vfp_bger_f(i0,r0,r1)          vbcmp_f(ARM_CC_GE,i0,r0,r1)
672 #  define vfp_bgei_f(i0,r0,i1)          _vfp_bgei_f(_jit,i0,r0,i1)
673 static jit_word_t _vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
674 #  define vfp_bger_d(i0,r0,r1)          vbcmp_d(ARM_CC_GE,i0,r0,r1)
675 #  define vfp_bgei_d(i0,r0,i1)          _vfp_bgei_d(_jit,i0,r0,i1)
676 static jit_word_t _vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
677 #  define vfp_bgtr_f(i0,r0,r1)          vbcmp_f(ARM_CC_GT,i0,r0,r1)
678 #  define vfp_bgti_f(i0,r0,i1)          _vfp_bgti_f(_jit,i0,r0,i1)
679 static jit_word_t _vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
680 #  define vfp_bgtr_d(i0,r0,r1)          vbcmp_d(ARM_CC_GT,i0,r0,r1)
681 #  define vfp_bgti_d(i0,r0,i1)          _vfp_bgti_d(_jit,i0,r0,i1)
682 static jit_word_t _vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
683 #  define vfp_bner_f(i0,r0,r1)          vbcmp_f(ARM_CC_NE,i0,r0,r1)
684 #  define vfp_bnei_f(i0,r0,i1)          _vfp_bnei_f(_jit,i0,r0,i1)
685 static jit_word_t _vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
686 #  define vfp_bner_d(i0,r0,r1)          vbcmp_d(ARM_CC_NE,i0,r0,r1)
687 #  define vfp_bnei_d(i0,r0,i1)          _vfp_bnei_d(_jit,i0,r0,i1)
688 static jit_word_t _vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
689 #  define vbncmp_x(cc,i0)               _vbncmp_x(_jit,cc,i0)
690 static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
691 #  define vbncmp_f(cc,i0,r0,r1)         _vbncmp_f(_jit,cc,i0,r0,r1)
692 static jit_word_t
693 _vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
694 #  define vbncmp_d(cc,i0,r0,r1)         _vbncmp_d(_jit,cc,i0,r0,r1)
695 static jit_word_t
696 _vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
697 #  define vfp_bunltr_f(i0,r0,r1)        vbncmp_f(ARM_CC_GE,i0,r0,r1)
698 #  define vfp_bunlti_f(i0,r0,i1)        _vfp_bunlti_f(_jit,i0,r0,i1)
699 static jit_word_t
700 _vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
701 #  define vfp_bunltr_d(i0,r0,r1)        vbncmp_d(ARM_CC_GE,i0,r0,r1)
702 #  define vfp_bunlti_d(i0,r0,i1)        _vfp_bunlti_d(_jit,i0,r0,i1)
703 static jit_word_t
704 _vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
705 #  define vfp_bunler_f(i0,r0,r1)        vbncmp_f(ARM_CC_GT,i0,r0,r1)
706 #  define vfp_bunlei_f(i0,r0,i1)        _vfp_bunlei_f(_jit,i0,r0,i1)
707 static jit_word_t
708 _vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
709 #  define vfp_bunler_d(i0,r0,r1)        vbncmp_d(ARM_CC_GT,i0,r0,r1)
710 #  define vfp_bunlei_d(i0,r0,i1)        _vfp_bunlei_d(_jit,i0,r0,i1)
711 static jit_word_t
712 _vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
713 #  define vfp_buneqr_x(i0)              _vfp_buneqr_x(_jit,i0)
714 static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
715 #  define vfp_buneqr_f(i0,r0,r1)        _vfp_buneqr_f(_jit,i0,r0,r1)
716 static jit_word_t
717 _vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
718 #  define vfp_buneqi_f(i0,r0,i1)        _vfp_buneqi_f(_jit,i0,r0,i1)
719 static jit_word_t
720 _vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
721 #  define vfp_buneqr_d(i0,r0,r1)        _vfp_buneqr_d(_jit,i0,r0,r1)
722 static jit_word_t
723 _vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
724 #  define vfp_buneqi_d(i0,r0,i1)        _vfp_buneqi_d(_jit,i0,r0,i1)
725 static jit_word_t
726 _vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
727 #  define vfp_bunger_x(i0)              _vfp_bunger_x(_jit,i0)
728 static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
729 #  define vfp_bunger_f(i0,r0,r1)        _vfp_bunger_f(_jit,i0,r0,r1)
730 static jit_word_t
731 _vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
732 #  define vfp_bungei_f(i0,r0,i1)        _vfp_bungei_f(_jit,i0,r0,i1)
733 static jit_word_t
734 _vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
735 #  define vfp_bunger_d(i0,r0,r1)        _vfp_bunger_d(_jit,i0,r0,r1)
736 static jit_word_t
737 _vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
738 #  define vfp_bungei_d(i0,r0,i1)        _vfp_bungei_d(_jit,i0,r0,i1)
739 static jit_word_t
740 _vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
741 #  define vfp_bungtr_f(i0,r0,r1)        vbcmp_f(ARM_CC_HI,i0,r0,r1)
742 #  define vfp_bungti_f(i0,r0,i1)        _vfp_bungti_f(_jit,i0,r0,i1)
743 static jit_word_t
744 _vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
745 #  define vfp_bungtr_d(i0,r0,r1)        vbcmp_d(ARM_CC_HI,i0,r0,r1)
746 #  define vfp_bungti_d(i0,r0,i1)        _vfp_bungti_d(_jit,i0,r0,i1)
747 static jit_word_t
748 _vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
749 #  define vfp_bltgtr_x(i0)              _vfp_bltgtr_x(_jit,i0)
750 static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
751 #  define vfp_bltgtr_f(i0,r0,r1)        _vfp_bltgtr_f(_jit,i0,r0,r1)
752 static jit_word_t
753 _vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
754 #  define vfp_bltgti_f(i0,r0,i1)        _vfp_bltgti_f(_jit,i0,r0,i1)
755 static jit_word_t
756 _vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
757 #  define vfp_bltgtr_d(i0,r0,r1)        _vfp_bltgtr_d(_jit,i0,r0,r1)
758 static jit_word_t
759 _vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
760 #  define vfp_bltgti_d(i0,r0,i1)        _vfp_bltgti_d(_jit,i0,r0,i1)
761 static jit_word_t
762 _vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
763 #  define vfp_bordr_f(i0,r0,r1)         vbcmp_f(ARM_CC_VC,i0,r0,r1)
764 #  define vfp_bordi_f(i0,r0,i1)         _vfp_bordi_f(_jit,i0,r0,i1)
765 static jit_word_t
766 _vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
767 #  define vfp_bordr_d(i0,r0,r1)         vbcmp_d(ARM_CC_VC,i0,r0,r1)
768 #  define vfp_bordi_d(i0,r0,i1)         _vfp_bordi_d(_jit,i0,r0,i1)
769 static jit_word_t
770 _vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
771 #  define vfp_bunordr_f(i0,r0,r1)       vbcmp_f(ARM_CC_VS,i0,r0,r1)
772 #  define vfp_bunordi_f(i0,r0,i1)       _vfp_bunordi_f(_jit,i0,r0,i1)
773 static jit_word_t
774 _vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
775 #  define vfp_bunordr_d(i0,r0,r1)       vbcmp_d(ARM_CC_VS,i0,r0,r1)
776 #  define vfp_bunordi_d(i0,r0,i1)       _vfp_bunordi_d(_jit,i0,r0,i1)
777 static jit_word_t
778 _vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
779 #  define vfp_ldr_f(r0,r1)              VLDR_F32(r0,r1,0)
780 #  define vfp_ldr_d(r0,r1)              VLDR_F64(r0,r1,0)
781 #  define vfp_ldi_f(r0,i0)              _vfp_ldi_f(_jit,r0,i0)
782 static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
783 #  define vfp_ldi_d(r0,i0)              _vfp_ldi_d(_jit,r0,i0)
784 static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
785 #  define vfp_ldxr_f(r0,r1,r2)          _vfp_ldxr_f(_jit,r0,r1,r2)
786 static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
787 #  define vfp_ldxr_d(r0,r1,r2)          _vfp_ldxr_d(_jit,r0,r1,r2)
788 static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
789 #  define vfp_ldxi_f(r0,r1,i0)          _vfp_ldxi_f(_jit,r0,r1,i0)
790 static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
791 #  define vfp_ldxi_d(r0,r1,i0)          _vfp_ldxi_d(_jit,r0,r1,i0)
792 static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
793 #  define vfp_str_f(r0,r1)              VSTR_F32(r1,r0,0)
794 #  define vfp_str_d(r0,r1)              VSTR_F64(r1,r0,0)
795 #  define vfp_sti_f(i0,r0)              _vfp_sti_f(_jit,i0,r0)
796 static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
797 #  define vfp_sti_d(i0,r0)              _vfp_sti_d(_jit,i0,r0)
798 static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
799 #  define vfp_stxr_f(r0,r1,r2)          _vfp_stxr_f(_jit,r0,r1,r2)
800 static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
801 #  define vfp_stxr_d(r0,r1,r2)          _vfp_stxr_d(_jit,r0,r1,r2)
802 static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
803 #  define vfp_stxi_f(i0,r0,r1)          _vfp_stxi_f(_jit,i0,r0,r1)
804 static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
805 #  define vfp_stxi_d(i0,r0,r1)          _vfp_stxi_d(_jit,i0,r0,r1)
806 static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
807 #  define vfp_vaarg_d(r0, r1)           _vfp_vaarg_d(_jit, r0, r1)
808 static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
809 #endif
810
811 #if CODE
812 #  define vfp_regno(rn)         (((rn) - 16) >> 1)
813
814 static int
815 encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
816 {
817     int         code, mode, imm, mask;
818
819     if (hi != lo) {
820         if (mov && !inv) {
821             /* (I64)
822              *  aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
823              */
824             for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
825                 imm = lo & mask;
826                 if (imm != mask && imm != 0)
827                     goto fail;
828                 imm = hi & mask;
829                 if (imm != mask && imm != 0)
830                     goto fail;
831             }
832             mode = 0xe20;
833             imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
834                    ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >>  3) |
835                    ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
836                    ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >>  7));
837             goto success;
838         }
839         goto fail;
840     }
841     /*  (I32)
842      *  00000000 00000000 00000000 abcdefgh
843      *  00000000 00000000 abcdefgh 00000000
844      *  00000000 abcdefgh 00000000 00000000
845      *  abcdefgh 00000000 00000000 00000000 */
846     for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
847         if ((lo & mask) == lo) {
848             imm = lo >> (mode << 3);
849             mode <<= 9;
850             goto success;
851         }
852     }
853     /*  (I16)
854      *  00000000 abcdefgh 00000000 abcdefgh
855      *  abcdefgh 00000000 abcdefgh 00000000 */
856     for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
857         if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
858             imm = lo >> (mode << 3);
859             mode = 0x800 | (mode << 9);
860             goto success;
861         }
862     }
863     if (mov) {
864         /*  (I32)
865          *  00000000 00000000 abcdefgh 11111111
866          *  00000000 abcdefgh 11111111 11111111 */
867         for (mode = 0, mask = 0xff; mode < 2;
868              mask = (mask << 8) | 0xff, mode++) {
869             if ((lo & mask) == mask &&
870                 !((lo & ~mask) >> 8) &&
871                 (imm = lo >> (8 + (mode << 8)))) {
872                 mode = 0xc00 | (mode << 8);
873                 goto success;
874             }
875         }
876         if (!inv) {
877             /* (F32)
878              *  aBbbbbbc defgh000 00000000 00000000
879              *  from the ARM Architecture Reference Manual:
880              *  In this entry, B = NOT(b). The bit pattern represents the
881              *  floating-point number (-1)^s* 2^exp * mantissa, where
882              *  S = UInt(a),
883              *  exp = UInt(NOT(b):c:d)-3 and
884              *  mantissa = (16+UInt(e:f:g:h))/16. */
885             if ((lo & 0x7ffff) == 0 &&
886                 (((lo & 0x7e000000) == 0x3e000000) ||
887                  ((lo & 0x7e000000) == 0x40000000))) {
888                 mode = 0xf00;
889                 imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
890                 goto success;
891             }
892         }
893     }
894
895 fail:
896     /* need another approach (load from memory, move from arm register, etc) */
897     return (-1);
898
899 success:
900     code = inv ? ARM_VMVNI : ARM_VMOVI;
901     switch ((mode & 0xf00) >> 8) {
902         case 0x0:       case 0x2:       case 0x4:       case 0x6:
903         case 0x8:       case 0xa:
904             if (inv)    mode |= 0x20;
905             if (!mov)   mode |= 0x100;
906             break;
907         case 0x1:       case 0x3:       case 0x5:       case 0x7:
908             /* should actually not reach here */
909             assert(!inv);
910         case 0x9:       case 0xb:
911             assert(!mov);
912             break;
913         case 0xc:       case 0xd:
914             /* should actually not reach here */
915             assert(inv);
916         case 0xe:
917             assert(mode & 0x20);
918             assert(mov && !inv);
919             break;
920         default:
921             assert(!(mode & 0x20));
922             break;
923     }
924     imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
925     code |= mode | imm;
926     if (jit_thumb_p()) {
927         if (code & 0x1000000)
928             code |= 0xff000000;
929         else
930             code |= 0xef000000;
931     }
932     else
933         code |= ARM_CC_NV;
934     return (code);
935 }
936
937 static void
938 _vodi(jit_state_t *_jit, int oi, int r0)
939 {
940     jit_thumb_t thumb;
941     assert(!(oi  & 0x0000f000));
942     assert(!(r0 & 1));  r0 = vfp_regno(r0);
943     thumb.i = oi|(_u4(r0)<<12);
944     if (jit_thumb_p())
945         iss(thumb.s[0], thumb.s[1]);
946     else
947         ii(thumb.i);
948 }
949
950 static void
951 _voqi(jit_state_t *_jit, int oi, int r0)
952 {
953     jit_thumb_t thumb;
954     assert(!(oi  & 0x0000f000));
955     assert(!(r0 & 3));  r0 = vfp_regno(r0);
956     thumb.i = oi|(_u4(r0)<<12);
957     if (jit_thumb_p())
958         iss(thumb.s[0], thumb.s[1]);
959     else
960         ii(thumb.i);
961 }
962
963 static void
964 _cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
965 {
966     jit_thumb_t thumb;
967     assert(!(cc & 0x0fffffff));
968     assert(!(o  & 0xf000f00f));
969     if (r0 & 1) o |= ARM_V_D;   r0 = vfp_regno(r0);
970     if (r1 & 1) o |= ARM_V_M;   r1 = vfp_regno(r1);
971     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
972     if (jit_thumb_p())
973         iss(thumb.s[0], thumb.s[1]);
974     else
975         ii(thumb.i);
976 }
977
978 static void
979 _cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
980 {
981     jit_thumb_t thumb;
982     assert(!(cc & 0x0fffffff));
983     assert(!(o  & 0xf000f00f));
984     assert(!(r0 & 1) && !(r1 & 1));
985     r0 = vfp_regno(r0); r1 = vfp_regno(r1);
986     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
987     if (jit_thumb_p())
988         iss(thumb.s[0], thumb.s[1]);
989     else
990         ii(thumb.i);
991 }
992
993 static void
994 _cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
995 {
996     jit_thumb_t thumb;
997     assert(!(cc & 0x0fffffff));
998     assert(!(o  & 0xf000f00f));
999     assert(!(r0 & 3) && !(r1 & 1));
1000     r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1001     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1002     if (jit_thumb_p())
1003         iss(thumb.s[0], thumb.s[1]);
1004     else
1005         ii(thumb.i);
1006 }
1007
1008 static void
1009 _cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
1010 {
1011     jit_thumb_t thumb;
1012     assert(!(cc & 0x0fffffff));
1013     assert(!(o  & 0xf000f00f));
1014     assert(!(r0 & 3) && !(r1 & 3));
1015     r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1016     thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1017     if (jit_thumb_p())
1018         iss(thumb.s[0], thumb.s[1]);
1019     else
1020         ii(thumb.i);
1021 }
1022
1023 static void
1024 _cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1025 {
1026     jit_thumb_t thumb;
1027     assert(!(cc & 0x0fffffff));
1028     assert(!(o  & 0xf000f00f));
1029     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1030     if (jit_thumb_p())
1031         iss(thumb.s[0], thumb.s[1]);
1032     else
1033         ii(thumb.i);
1034 }
1035
1036 static void
1037 _cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1038 {
1039     jit_thumb_t thumb;
1040     assert(!(cc & 0x0fffffff));
1041     assert(!(o  & 0xf000f00f));
1042     if (r1 & 1) o |= ARM_V_N;   r1 = vfp_regno(r1);
1043     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1044     if (jit_thumb_p())
1045         iss(thumb.s[0], thumb.s[1]);
1046     else
1047         ii(thumb.i);
1048 }
1049
1050 static void
1051 _cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1052 {
1053     jit_thumb_t thumb;
1054     assert(!(cc & 0x0fffffff));
1055     assert(!(o  & 0xf000f00f));
1056     if (r1 & 1) o |= ARM_V_M;   r1 = vfp_regno(r1);
1057     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1058     if (jit_thumb_p())
1059         iss(thumb.s[0], thumb.s[1]);
1060     else
1061         ii(thumb.i);
1062 }
1063
1064 static void
1065 _cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1066 {
1067     jit_thumb_t thumb;
1068     assert(!(cc & 0x0fffffff));
1069     assert(!(o  & 0xf000f00f));
1070     /* use same bit pattern, to set opc1... */
1071     if (r1 & 1) o |= ARM_V_I32; r1 = vfp_regno(r1);
1072     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1073     if (jit_thumb_p())
1074         iss(thumb.s[0], thumb.s[1]);
1075     else
1076         ii(thumb.i);
1077 }
1078
1079 static void
1080 _cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1081 {
1082     jit_thumb_t thumb;
1083     assert(!(cc & 0x0fffffff));
1084     assert(!(o  & 0xf00ff00f));
1085     assert(!(r2 & 1));
1086     r2 = vfp_regno(r2);
1087     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1088     if (jit_thumb_p())
1089         iss(thumb.s[0], thumb.s[1]);
1090     else
1091         ii(thumb.i);
1092 }
1093
1094 static void
1095 _cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1096 {
1097     jit_thumb_t thumb;
1098     assert(!(cc & 0x0fffffff));
1099     assert(!(o  & 0xf00ff00f));
1100     if (r0 & 1) o |= ARM_V_D;   r0 = vfp_regno(r0);
1101     if (r1 & 1) o |= ARM_V_N;   r1 = vfp_regno(r1);
1102     if (r2 & 1) o |= ARM_V_M;   r2 = vfp_regno(r2);
1103     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1104     if (jit_thumb_p())
1105         iss(thumb.s[0], thumb.s[1]);
1106     else
1107         ii(thumb.i);
1108 }
1109
1110 static void
1111 _cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1112 {
1113     jit_thumb_t thumb;
1114     assert(!(cc & 0x0fffffff));
1115     assert(!(o  & 0xf00ff00f));
1116     assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
1117     r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
1118     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1119     if (jit_thumb_p())
1120         iss(thumb.s[0], thumb.s[1]);
1121     else
1122         ii(thumb.i);
1123 }
1124
1125 static void
1126 _cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1127 {
1128     jit_thumb_t thumb;
1129     assert(!(cc & 0x0fffffff));
1130     assert(!(o  & 0xf00ff00f));
1131     assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
1132     r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
1133     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1134     if (jit_thumb_p())
1135         iss(thumb.s[0], thumb.s[1]);
1136     else
1137         ii(thumb.i);
1138 }
1139
1140 static void
1141 _cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1142 {
1143     jit_thumb_t thumb;
1144     assert(!(cc & 0x0fffffff));
1145     assert(!(o  & 0xf00ff00f));
1146     assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
1147     r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
1148     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1149     if (jit_thumb_p())
1150         iss(thumb.s[0], thumb.s[1]);
1151     else
1152         ii(thumb.i);
1153 }
1154
1155 static void
1156 _cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1157 {
1158     jit_thumb_t thumb;
1159     assert(!(cc & 0x0fffffff));
1160     assert(!(o  & 0xf00ff00f));
1161     assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
1162     r0 = vfp_regno(r0); r1 = vfp_regno(r1);     r2 = vfp_regno(r2);
1163     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1164     if (jit_thumb_p())
1165         iss(thumb.s[0], thumb.s[1]);
1166     else
1167         ii(thumb.i);
1168 }
1169
1170 static void
1171 _cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1172 {
1173     jit_thumb_t thumb;
1174     /* i0 << 2 is byte offset */
1175     assert(!(cc & 0x0fffffff));
1176     assert(!(o  & 0xf00ff0ff));
1177     if (r0 & 1) {
1178         assert(!(o & ARM_V_F64));
1179         o |= ARM_V_D;
1180     }
1181     r0 = vfp_regno(r0);
1182     thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
1183     if (jit_thumb_p())
1184         iss(thumb.s[0], thumb.s[1]);
1185     else
1186         ii(thumb.i);
1187 }
1188
1189 static void
1190 _cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1191 {
1192     jit_thumb_t thumb;
1193     assert(!(cc & 0x0fffffff));
1194     assert(!(o  & 0xf00ff0ff));
1195     /* save i0 double precision registers */
1196     if (o & ARM_V_F64)          i0 <<= 1;
1197     /* if (r1 & 1) cc & ARM_V_F64 must be false */
1198     if (r1 & 1) o |= ARM_V_D;   r1 = vfp_regno(r1);
1199     assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
1200     thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
1201     if (jit_thumb_p())
1202         iss(thumb.s[0], thumb.s[1]);
1203     else
1204         ii(thumb.i);
1205 }
1206
1207 static void
1208 _vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1209 {
1210     if (r0 != r1) {
1211         if (jit_fpr_p(r1)) {
1212             if (jit_fpr_p(r0))
1213                 VMOV_F32(r0, r1);
1214             else
1215                 VMOV_A_S(r0, r1);
1216         }
1217         else if (jit_fpr_p(r0))
1218             VMOV_S_A(r0, r1);
1219         else
1220             movr(r0, r1);
1221     }
1222 }
1223
1224 static void
1225 _vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1226 {
1227     if (r0 != r1) {
1228         if (jit_fpr_p(r1)) {
1229             if (jit_fpr_p(r0))
1230                 VMOV_F64(r0, r1);
1231             else
1232                 VMOV_AA_D(r0, r0 + 1, r1);
1233         }
1234         else if (jit_fpr_p(r0))
1235             VMOV_D_AA(r0, r1, r1 + 1);
1236         else {
1237             /* minor consistency check */
1238             assert(r0 + 1 != r1 && r0 -1 != r1);
1239             movr(r0, r1);
1240             movr(r0 + 1, r1 + 1);
1241         }
1242     }
1243 }
1244
1245 static void
1246 _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
1247 {
1248     union {
1249         jit_int32_t     i;
1250         jit_float32_t   f;
1251     } u;
1252     jit_int32_t         reg;
1253     jit_int32_t         code;
1254     u.f = i0;
1255     if (jit_fpr_p(r0)) {
1256         /* float arguments are packed, for others,
1257          * lightning only address even registers */
1258         if (!(r0 & 1) && (r0 - 16) >= 0 &&
1259             ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
1260              (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
1261             VIMM(code, r0);
1262         else {
1263             reg = jit_get_reg(jit_class_gpr);
1264             movi(rn(reg), u.i);
1265             VMOV_S_A(r0, rn(reg));
1266             jit_unget_reg(reg);
1267         }
1268     }
1269     else
1270         movi(r0, u.i);
1271 }
1272
1273 static void
1274 _vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
1275 {
1276     union {
1277         jit_int32_t     i[2];
1278         jit_float64_t   d;
1279     } u;
1280     jit_int32_t         code;
1281     jit_int32_t         rg0, rg1;
1282     u.d = i0;
1283     if (jit_fpr_p(r0)) {
1284         if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
1285             (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
1286             VIMM(code, r0);
1287         else {
1288             rg0 = jit_get_reg(jit_class_gpr);
1289             rg1 = jit_get_reg(jit_class_gpr);
1290             movi(rn(rg0), u.i[0]);
1291             movi(rn(rg1), u.i[1]);
1292             VMOV_D_AA(r0, rn(rg0), rn(rg1));
1293             jit_unget_reg(rg1);
1294             jit_unget_reg(rg0);
1295         }
1296     }
1297     else {
1298         movi(r0, u.i[0]);
1299         movi(r0 + 1, u.i[1]);
1300     }
1301 }
1302
1303 static void
1304 _vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1305 {
1306     jit_int32_t         reg;
1307     if (jit_fpr_p(r1)) {
1308         if (jit_fpr_p(r0))
1309             VCVT_F64_F32(r0, r1);
1310         else {
1311             reg = jit_get_reg(jit_class_fpr);
1312             VCVT_F64_F32(rn(reg), r1);
1313             VMOV_A_S(r0, rn(reg));
1314             jit_unget_reg(reg);
1315         }
1316     }
1317     else {
1318         reg = jit_get_reg(jit_class_fpr);
1319         VMOV_S_A(rn(reg), r1);
1320         VCVT_F64_F32(rn(reg), rn(reg));
1321         if (jit_fpr_p(r0))
1322             VMOV_F32(r0, rn(reg));
1323         else
1324             VMOV_A_S(r0, rn(reg));
1325         jit_unget_reg(reg);
1326     }
1327 }
1328
1329 static void
1330 _vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1331 {
1332     jit_int32_t         reg;
1333     if (jit_fpr_p(r1)) {
1334         if (jit_fpr_p(r0))
1335             VCVT_F32_F64(r0, r1);
1336         else {
1337             reg = jit_get_reg(jit_class_fpr);
1338             VCVT_F32_F64(rn(reg), r1);
1339             VMOV_AA_D(r0, r0 + 1, rn(reg));
1340             jit_unget_reg(reg);
1341         }
1342     }
1343     else {
1344         reg = jit_get_reg(jit_class_fpr);
1345         VMOV_D_AA(rn(reg), r1, r1 + 1);
1346         VCVT_F32_F64(rn(reg), rn(reg));
1347         if (jit_fpr_p(r0))
1348             VMOV_F64(r0, rn(reg));
1349         else
1350             VMOV_AA_D(r0, r0 + 1, rn(reg));
1351         jit_unget_reg(reg);
1352     }
1353 }
1354
1355 static void
1356 _vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1357 {
1358     jit_int32_t         reg;
1359     if (jit_fpr_p(r0)) {
1360         VMOV_V_I32(r0, r1);
1361         VCVT_F32_S32(r0, r0);
1362     }
1363     else {
1364         reg = jit_get_reg(jit_class_fpr);
1365         VMOV_V_I32(rn(reg), r1);
1366         VCVT_F32_S32(rn(reg), rn(reg));
1367         VMOV_F32(r0, rn(reg));
1368         jit_unget_reg(reg);
1369     }
1370 }
1371
1372 static void
1373 _vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1374 {
1375     jit_int32_t         reg;
1376     if (jit_fpr_p(r0)) {
1377         VMOV_V_I32(r0, r1);
1378         VCVT_F64_S32(r0, r0);
1379     }
1380     else {
1381         reg = jit_get_reg(jit_class_fpr);
1382         VMOV_V_I32(rn(reg), r1);
1383         VCVT_F64_S32(rn(reg), rn(reg));
1384         VMOV_F64(r0, rn(reg));
1385         jit_unget_reg(reg);
1386     }
1387 }
1388
1389 static void
1390 _vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1391 {
1392     jit_int32_t         reg;
1393     reg = jit_get_reg(jit_class_fpr);
1394     if (jit_fpr_p(r1))
1395         VCVT_S32_F32(rn(reg), r1);
1396     else {
1397         VMOV_V_I32(rn(reg), r1);
1398         VCVT_S32_F32(rn(reg), rn(reg));
1399     }
1400     VMOV_A_S32(r0, rn(reg));
1401     jit_unget_reg(reg);
1402 }
1403
1404 static void
1405 _vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1406 {
1407     jit_int32_t         reg;
1408     reg = jit_get_reg(jit_class_fpr);
1409     if (jit_fpr_p(r1))
1410         VCVT_S32_F64(rn(reg), r1);
1411     else {
1412         VMOV_V_I32(rn(reg), r1);
1413         VCVT_S32_F64(rn(reg), rn(reg));
1414     }
1415     VMOV_A_S32(r0, rn(reg));
1416     jit_unget_reg(reg);
1417 }
1418
1419 #  define fopi(name)                                                    \
1420 static void                                                             \
1421 _vfp_##name##i_f(jit_state_t *_jit,                                     \
1422                  jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)      \
1423 {                                                                       \
1424     jit_int32_t         reg = jit_get_reg(jit_class_fpr);               \
1425     vfp_movi_f(rn(reg), i0);                                            \
1426     vfp_##name##r_f(r0, r1, rn(reg));                                   \
1427     jit_unget_reg(reg);                                                 \
1428 }
1429 #  define dopi(name)                                                    \
1430 static void                                                             \
1431 _vfp_##name##i_d(jit_state_t *_jit,                                     \
1432                  jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)      \
1433 {                                                                       \
1434     jit_int32_t         reg = jit_get_reg(jit_class_fpr);               \
1435     vfp_movi_d(rn(reg), i0);                                            \
1436     vfp_##name##r_d(r0, r1, rn(reg));                                   \
1437     jit_unget_reg(reg);                                                 \
1438 }
1439 #  define fbopi(name)                                                   \
1440 static jit_word_t                                                       \
1441 _vfp_b##name##i_f(jit_state_t *_jit,                                    \
1442                   jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)     \
1443 {                                                                       \
1444     jit_word_t          word;                                           \
1445     jit_int32_t         reg = jit_get_reg(jit_class_fpr|                \
1446                                           jit_class_nospill);           \
1447     vfp_movi_f(rn(reg), i0);                                            \
1448     word = vfp_b##name##r_f(r0, r1, rn(reg));                           \
1449     jit_unget_reg(reg);                                                 \
1450     return (word);                                                      \
1451 }
1452 #  define dbopi(name)                                                   \
1453 static jit_word_t                                                       \
1454 _vfp_b##name##i_d(jit_state_t *_jit,                                    \
1455                   jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)     \
1456 {                                                                       \
1457     jit_word_t          word;                                           \
1458     jit_int32_t         reg = jit_get_reg(jit_class_fpr|                \
1459                                           jit_class_nospill);           \
1460     vfp_movi_d(rn(reg), i0);                                            \
1461     word = vfp_b##name##r_d(r0, r1, rn(reg));                           \
1462     jit_unget_reg(reg);                                                 \
1463     return (word);                                                      \
1464 }
1465
1466 fopi(add)
1467 dopi(add)
1468 fopi(sub)
1469 fopi(rsb)
1470 dopi(rsb)
1471 dopi(sub)
1472 fopi(mul)
1473 dopi(mul)
1474 fopi(div)
1475 dopi(div)
1476
1477 static void
1478 _vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1479 {
1480     jit_int32_t         rg0, rg1;
1481     if (jit_fpr_p(r0)) {
1482         if (jit_fpr_p(r1))
1483             VCMP_F32(r0, r1);
1484         else {
1485             rg1 = jit_get_reg(jit_class_fpr);
1486             VMOV_S_A(rn(rg1), r1);
1487             VCMP_F32(r0, rn(rg1));
1488             jit_unget_reg(rg1);
1489         }
1490     }
1491     else {
1492         rg0 = jit_get_reg(jit_class_fpr);
1493         VMOV_S_A(rn(rg0), r0);
1494         if (jit_fpr_p(r1))
1495             VCMP_F32(rn(rg0), r1);
1496         else {
1497             rg1 = jit_get_reg(jit_class_fpr);
1498             VMOV_S_A(rn(rg1), r1);
1499             VCMP_F32(rn(rg0), rn(rg1));
1500             jit_unget_reg(rg1);
1501         }
1502         jit_unget_reg(rg0);
1503     }
1504 }
1505
1506 static void
1507 _vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1508 {
1509     jit_int32_t         rg0, rg1;
1510     if (jit_fpr_p(r0)) {
1511         if (jit_fpr_p(r1))
1512             VCMP_F64(r0, r1);
1513         else {
1514             rg1 = jit_get_reg(jit_class_fpr);
1515             VMOV_D_AA(rn(rg1), r1, r1 + 1);
1516             VCMP_F64(r0, rn(rg1));
1517             jit_unget_reg(rg1);
1518         }
1519     }
1520     else {
1521         rg0 = jit_get_reg(jit_class_fpr);
1522         VMOV_D_AA(rn(rg0), r0, r0 + 1);
1523         if (jit_fpr_p(r1))
1524             VCMP_F64(rn(rg0), r1);
1525         else {
1526             rg1 = jit_get_reg(jit_class_fpr);
1527             VMOV_D_AA(rn(rg1), r1, r1 + 1);
1528             VCMP_F64(rn(rg0), rn(rg1));
1529             jit_unget_reg(rg1);
1530         }
1531         jit_unget_reg(rg0);
1532     }
1533 }
1534
1535 static void
1536 _vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
1537 {
1538     VMRS(_R15_REGNO);
1539     if (jit_thumb_p()) {
1540         if ((c0 ^ c1) >> 28 == 1) {
1541             ITE(c0);
1542             if (r0 < 8) {
1543                 T1_MOVI(r0, 0);
1544                 T1_MOVI(r0, 1);
1545             }
1546             else {
1547                 T2_MOVI(r0, 0);
1548                 T2_MOVI(r0, 1);
1549             }
1550         }
1551         else {
1552             if (r0 < 8) {
1553                 IT(c0);
1554                 T1_MOVI(r0, 0);
1555                 IT(c1);
1556                 T1_MOVI(r0, 1);
1557             }
1558             else {
1559                 IT(c0);
1560                 T2_MOVI(r0, 0);
1561                 IT(c1);
1562                 T2_MOVI(r0, 1);
1563             }
1564         }
1565     }
1566     else {
1567         CC_MOVI(c0, r0, 0);
1568         CC_MOVI(c1, r0, 1);
1569     }
1570 }
1571
1572 static void
1573 _vcmp01_f(jit_state_t *_jit, int c0, int c1,
1574           jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1575 {
1576     vfp_cmp_f(r1, r2);
1577     vcmp01_x(c0, c1, r0);
1578 }
1579
1580 static void
1581 _vcmp01_d(jit_state_t *_jit, int c0, int c1,
1582           jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1583 {
1584     vfp_cmp_d(r1, r2);
1585     vcmp01_x(c0, c1, r0);
1586 }
1587
1588 static void
1589 _vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1590 {
1591     if (jit_thumb_p()) {
1592         if (r0 < 8) {
1593             T1_MOVI(r0, 1);
1594             VMRS(_R15_REGNO);
1595             IT(cc);
1596             T1_MOVI(r0, 0);
1597         }
1598         else {
1599             T2_MOVI(r0, 1);
1600             VMRS(_R15_REGNO);
1601             IT(cc);
1602             T2_MOVI(r0, 0);
1603         }
1604     }
1605     else {
1606         VMRS(_R15_REGNO);
1607         MOVI(r0, 1);
1608         CC_MOVI(cc, r0, 0);
1609     }
1610 }
1611 static void
1612 _vcmp_10_f(jit_state_t *_jit, int cc,
1613            jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1614 {
1615     vfp_cmp_f(r1, r2);
1616     vcmp10_x(cc, r0);
1617 }
1618
1619 static void
1620 _vcmp_10_d(jit_state_t *_jit, int cc,
1621            jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1622 {
1623     vfp_cmp_d(r1, r2);
1624     vcmp10_x(cc, r0);
1625 }
1626
1627 fopi(lt)
1628 dopi(lt)
1629 fopi(le)
1630 dopi(le)
1631 fopi(eq)
1632 dopi(eq)
1633 fopi(ge)
1634 dopi(ge)
1635 fopi(gt)
1636 dopi(gt)
1637 fopi(ne)
1638 dopi(ne)
1639 fopi(unlt)
1640 dopi(unlt)
1641 fopi(unle)
1642 dopi(unle)
1643
1644 static void
1645 _vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
1646 {
1647     VMRS(_R15_REGNO);
1648     if (jit_thumb_p()) {
1649         ITE(ARM_CC_NE);
1650         if (r0 < 8) {
1651             T1_MOVI(r0, 0);
1652             T1_MOVI(r0, 1);
1653             IT(ARM_CC_VS);
1654             T1_MOVI(r0, 1);
1655         }
1656         else {
1657             T2_MOVI(r0, 0);
1658             T2_MOVI(r0, 1);
1659             IT(ARM_CC_VS);
1660             T2_MOVI(r0, 1);
1661         }
1662     }
1663     else {
1664         CC_MOVI(ARM_CC_NE, r0, 0);
1665         CC_MOVI(ARM_CC_EQ, r0, 1);
1666         CC_MOVI(ARM_CC_VS, r0, 1);
1667     }
1668 }
1669
1670 static void
1671 _vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1672 {
1673     vfp_cmp_f(r1, r2);
1674     vfp_uneqr_x(r0);
1675 }
1676
1677 fopi(uneq)
1678
1679 static void
1680 _vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1681 {
1682     vfp_cmp_d(r1, r2);
1683     vfp_uneqr_x(r0);
1684 }
1685
1686 dopi(uneq)
1687
1688 static void
1689 _vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1690 {
1691     if (jit_thumb_p()) {
1692         if (r0 < 8) {
1693             T1_MOVI(r0, 0);
1694             VMRS(_R15_REGNO);
1695             IT(cc);
1696             T1_MOVI(r0, 1);
1697         }
1698         else {
1699             T2_MOVI(r0, 0);
1700             VMRS(_R15_REGNO);
1701             IT(cc);
1702             T2_MOVI(r0, 1);
1703         }
1704     }
1705     else {
1706         MOVI(r0, 0);
1707         VMRS(_R15_REGNO);
1708         CC_MOVI(cc, r0, 1);
1709     }
1710 }
1711
1712 static void
1713 _vcmp_01_f(jit_state_t *_jit, int cc,
1714            jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1715 {
1716     vfp_cmp_f(r1, r2);
1717     vcmp_01_x(cc, r0);
1718 }
1719
1720 static void
1721 _vcmp_01_d(jit_state_t *_jit, int cc,
1722            jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1723 {
1724     vfp_cmp_d(r1, r2);
1725     vcmp_01_x(cc, r0);
1726 }
1727
1728 fopi(unge)
1729 dopi(unge)
1730 fopi(ungt)
1731 dopi(ungt)
1732
1733 static void
1734 _vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
1735 {
1736     VMRS(_R15_REGNO);
1737     if (jit_thumb_p()) {
1738         ITE(ARM_CC_NE);
1739         if (r0 < 8) {
1740             T1_MOVI(r0, 1);
1741             T1_MOVI(r0, 0);
1742             IT(ARM_CC_VS);
1743             T1_MOVI(r0, 0);
1744         }
1745         else {
1746             T2_MOVI(r0, 1);
1747             T2_MOVI(r0, 0);
1748             IT(ARM_CC_VS);
1749             T2_MOVI(r0, 0);
1750         }
1751     }
1752     else {
1753         CC_MOVI(ARM_CC_NE, r0, 1);
1754         CC_MOVI(ARM_CC_EQ, r0, 0);
1755         CC_MOVI(ARM_CC_VS, r0, 0);
1756     }
1757 }
1758
1759 static void
1760 _vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1761 {
1762     vfp_cmp_f(r1, r2);
1763     vfp_ltgtr_x(r0);
1764 }
1765
1766 fopi(ltgt)
1767
1768 static void
1769 _vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1770 {
1771     vfp_cmp_d(r1, r2);
1772     vfp_ltgtr_x(r0);
1773 }
1774
1775 dopi(ltgt)
1776
1777 static void
1778 _vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1779 {
1780     vfp_cmp_f(r1, r2);
1781     vcmp10_x(ARM_CC_VS, r0);
1782 }
1783
1784 fopi(ord)
1785
1786 static void
1787 _vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1788 {
1789     vfp_cmp_d(r1, r2);
1790     vcmp10_x(ARM_CC_VS, r0);
1791 }
1792
1793 dopi(ord)
1794
1795 static void
1796 _vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1797 {
1798     vfp_cmp_f(r1, r2);
1799     vcmp_01_x(ARM_CC_VS, r0);
1800 }
1801
1802 fopi(unord)
1803
1804 static void
1805 _vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1806 {
1807     vfp_cmp_d(r1, r2);
1808     vcmp_01_x(ARM_CC_VS, r0);
1809 }
1810
1811 dopi(unord)
1812
1813 static jit_word_t
1814 _vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
1815 {
1816     jit_word_t          d, w;
1817     VMRS(_R15_REGNO);
1818     w = _jit->pc.w;
1819     if (jit_thumb_p()) {
1820         d = ((i0 - w) >> 1) - 2;
1821         assert(_s20P(d));
1822         T2_CC_B(cc, encode_thumb_cc_jump(d));
1823     }
1824     else {
1825         d = ((i0 - w) >> 2) - 2;
1826         assert(_s24P(d));
1827         CC_B(cc, d & 0x00ffffff);
1828     }
1829     return (w);
1830 }
1831
1832
1833 static jit_word_t
1834 _vbcmp_f(jit_state_t *_jit, int cc,
1835          jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1836 {
1837     vfp_cmp_f(r0, r1);
1838     return (vbcmp_x(cc, i0));
1839 }
1840
1841 static jit_word_t
1842 _vbcmp_d(jit_state_t *_jit, int cc,
1843          jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1844 {
1845     vfp_cmp_d(r0, r1);
1846     return (vbcmp_x(cc, i0));
1847 }
1848
1849 static jit_word_t
1850 _vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
1851 {
1852     jit_word_t          d, p, w;
1853     VMRS(_R15_REGNO);
1854     p = _jit->pc.w;
1855     if (jit_thumb_p()) {
1856         T2_CC_B(cc, 0);
1857         w = _jit->pc.w;
1858         d = ((i0 - w) >> 1) - 2;
1859         assert(_s20P(d));
1860         T2_B(encode_thumb_jump(d));
1861     }
1862     else {
1863         CC_B(cc, 0);
1864         w = _jit->pc.w;
1865         d = ((i0 - w) >> 2) - 2;
1866         assert(_s24P(d));
1867         B(d & 0x00ffffff);
1868     }
1869     patch_at(arm_patch_jump, p, _jit->pc.w);
1870     return (w);
1871 }
1872
1873 static jit_word_t
1874 _vbncmp_f(jit_state_t *_jit, int cc,
1875           jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1876 {
1877     vfp_cmp_f(r0, r1);
1878     return (vbncmp_x(cc, i0));
1879 }
1880
1881 static jit_word_t
1882 _vbncmp_d(jit_state_t *_jit, int cc,
1883           jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1884 {
1885     vfp_cmp_d(r0, r1);
1886     return (vbncmp_x(cc, i0));
1887 }
1888
1889 fbopi(lt)
1890 dbopi(lt)
1891 fbopi(le)
1892 dbopi(le)
1893 fbopi(eq)
1894 dbopi(eq)
1895 fbopi(ge)
1896 dbopi(ge)
1897 fbopi(gt)
1898 dbopi(gt)
1899 fbopi(ne)
1900 dbopi(ne)
1901 fbopi(unlt)
1902 dbopi(unlt)
1903 fbopi(unle)
1904 dbopi(unle)
1905
1906 static jit_word_t
1907 _vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
1908 {
1909     jit_word_t          d, p, q, w;
1910     VMRS(_R15_REGNO);
1911     p = _jit->pc.w;
1912     if (jit_thumb_p()) {
1913         T2_CC_B(ARM_CC_VS, 0);
1914         q = _jit->pc.w;
1915         T2_CC_B(ARM_CC_NE, 0);
1916         patch_at(arm_patch_jump, p, _jit->pc.w);
1917         w = _jit->pc.w;
1918         d = ((i0 - w) >> 1) - 2;
1919         assert(_s20P(d));
1920         T2_B(encode_thumb_jump(d));
1921     }
1922     else {
1923         CC_B(ARM_CC_VS, 0);
1924         q = _jit->pc.w;
1925         CC_B(ARM_CC_NE, 0);
1926         patch_at(arm_patch_jump, p, _jit->pc.w);
1927         w = _jit->pc.w;
1928         d = ((i0 - w) >> 2) - 2;
1929         assert(_s24P(d));
1930         B(d & 0x00ffffff);
1931     }
1932     patch_at(arm_patch_jump, q, _jit->pc.w);
1933     return (w);
1934 }
1935
1936 static jit_word_t
1937 _vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1938 {
1939     vfp_cmp_f(r0, r1);
1940     return (vfp_buneqr_x(i0));
1941 }
1942
1943 fbopi(uneq)
1944
1945 static jit_word_t
1946 _vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1947 {
1948     vfp_cmp_d(r0, r1);
1949     return (vfp_buneqr_x(i0));
1950 }
1951
1952 dbopi(uneq)
1953
1954 static jit_word_t
1955 _vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
1956 {
1957     jit_word_t          d, p, w;
1958     VMRS(_R15_REGNO);
1959     p = _jit->pc.w;
1960     if (jit_thumb_p()) {
1961         T2_CC_B(ARM_CC_MI, 0);
1962         w = _jit->pc.w;
1963         d = ((i0 - w) >> 1) - 2;
1964         assert(_s20P(d));
1965         T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
1966     }
1967     else {
1968         CC_B(ARM_CC_MI, 0);
1969         w = _jit->pc.w;
1970         d = ((i0 - w) >> 2) - 2;
1971         assert(_s24P(d));
1972         CC_B(ARM_CC_HS, d & 0x00ffffff);
1973     }
1974     patch_at(arm_patch_jump, p, _jit->pc.w);
1975     return (w);
1976 }
1977
1978 static jit_word_t
1979 _vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1980 {
1981     vfp_cmp_f(r0, r1);
1982     return (vfp_bunger_x(i0));
1983 }
1984
1985 fbopi(unge)
1986
1987 static jit_word_t
1988 _vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1989 {
1990     vfp_cmp_d(r0, r1);
1991     return (vfp_bunger_x(i0));
1992 }
1993
1994 dbopi(unge)
1995
1996 static jit_word_t
1997 _vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
1998 {
1999     jit_word_t          d, p, q, w;
2000     VMRS(_R15_REGNO);
2001     p = _jit->pc.w;
2002     if (jit_thumb_p()) {
2003         T2_CC_B(ARM_CC_VS, 0);
2004         q = _jit->pc.w;
2005         T2_CC_B(ARM_CC_EQ, 0);
2006         w = _jit->pc.w;
2007         d = ((i0 - w) >> 1) - 2;
2008         assert(_s20P(d));
2009         T2_B(encode_thumb_jump(d));
2010     }
2011     else {
2012         CC_B(ARM_CC_VS, 0);
2013         q = _jit->pc.w;
2014         CC_B(ARM_CC_EQ, 0);
2015         w = _jit->pc.w;
2016         d = ((i0 - w) >> 2) - 2;
2017         assert(_s24P(d));
2018         B(d & 0x00ffffff);
2019     }
2020     patch_at(arm_patch_jump, p, _jit->pc.w);
2021     patch_at(arm_patch_jump, q, _jit->pc.w);
2022     return (w);
2023 }
2024
2025 static jit_word_t
2026 _vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2027 {
2028     vfp_cmp_f(r0, r1);
2029     return (vfp_bltgtr_x(i0));
2030 }
2031
2032 fbopi(ungt)
2033 dbopi(ungt)
2034 fbopi(ltgt)
2035
2036 static jit_word_t
2037 _vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2038 {
2039     vfp_cmp_d(r0, r1);
2040     return (vfp_bltgtr_x(i0));
2041 }
2042
2043 dbopi(ltgt)
2044 fbopi(ord)
2045 dbopi(ord)
2046 fbopi(unord)
2047 dbopi(unord)
2048
2049 static void
2050 _vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2051 {
2052     jit_int32_t         gpr;
2053     if (jit_fpr_p(r0)) {
2054         gpr = jit_get_reg(jit_class_gpr);
2055         movi(rn(gpr), i0);
2056         VLDR_F32(r0, rn(gpr), 0);
2057         jit_unget_reg(gpr);
2058     }
2059     else
2060         ldi_i(r0, i0);
2061 }
2062
2063 static void
2064 _vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2065 {
2066     jit_int32_t         reg;
2067     reg = jit_get_reg(jit_class_gpr);
2068     movi(rn(reg), i0);
2069     if (jit_fpr_p(r0))
2070         VLDR_F64(r0, rn(reg), 0);
2071     else {
2072         ldr_i(r0, rn(reg));
2073         ldxi_i(r0 + 1, rn(reg), 4);
2074     }
2075     jit_unget_reg(reg);
2076 }
2077
2078 static void
2079 _vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2080 {
2081     jit_int32_t         reg;
2082     if (jit_fpr_p(r0)) {
2083         reg = jit_get_reg(jit_class_gpr);
2084         addr(rn(reg), r1, r2);
2085         VLDR_F32(r0, rn(reg), 0);
2086         jit_unget_reg(reg);
2087     }
2088     else
2089         ldxr_i(r0, r1, r2);
2090 }
2091
2092 static void
2093 _vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2094 {
2095     jit_int32_t         reg;
2096     reg = jit_get_reg(jit_class_gpr);
2097     addr(rn(reg), r1, r2);
2098     if (jit_fpr_p(r0))
2099         VLDR_F64(r0, rn(reg), 0);
2100     else {
2101         ldr_i(r0, rn(reg));
2102         ldxi_i(r0 + 1, rn(reg), 4);
2103     }
2104     jit_unget_reg(reg);
2105 }
2106
2107 static void
2108 _vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2109 {
2110     jit_int32_t         reg;
2111     if (jit_fpr_p(r0)) {
2112         if (i0 >= 0) {
2113             assert(!(i0 & 3));
2114             if (i0 < 1024)
2115                 VLDR_F32(r0, r1, i0 >> 2);
2116             else {
2117                 reg = jit_get_reg(jit_class_gpr);
2118                 addi(rn(reg), r1, i0);
2119                 VLDR_F32(r0, rn(reg), 0);
2120                 jit_unget_reg(reg);
2121             }
2122         }
2123         else {
2124             i0 = -i0;
2125             assert(!(i0 & 3));
2126             if (i0 < 1024)
2127                 VLDRN_F32(r0, r1, i0 >> 2);
2128             else {
2129                 reg = jit_get_reg(jit_class_gpr);
2130                 subi(rn(reg), r1, i0);
2131                 VLDR_F32(r0, rn(reg), 0);
2132                 jit_unget_reg(reg);
2133             }
2134         }
2135     }
2136     else
2137         ldxi_i(r0, r1, i0);
2138 }
2139
2140 static void
2141 _vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2142 {
2143     jit_int32_t         reg;
2144     if (jit_fpr_p(r0)) {
2145         if (i0 >= 0) {
2146             assert(!(i0 & 3));
2147             if (i0 < 1024)
2148                 VLDR_F64(r0, r1, i0 >> 2);
2149             else {
2150                 reg = jit_get_reg(jit_class_gpr);
2151                 addi(rn(reg), r1, i0);
2152                 VLDR_F64(r0, rn(reg), 0);
2153                 jit_unget_reg(reg);
2154             }
2155         }
2156         else {
2157             i0 = -i0;
2158             assert(!(i0 & 3));
2159             if (i0 < 1024)
2160                 VLDRN_F64(r0, r1, i0 >> 2);
2161             else {
2162                 reg = jit_get_reg(jit_class_gpr);
2163                 subi(rn(reg), r1, i0);
2164                 VLDR_F64(r0, rn(reg), 0);
2165                 jit_unget_reg(reg);
2166             }
2167         }
2168     }
2169     else {
2170         reg = jit_get_reg(jit_class_gpr);
2171         addi(rn(reg), r1, i0);
2172         ldr_i(r0, rn(reg));
2173         ldxi_i(r0 + 1, rn(reg), 4);
2174         jit_unget_reg(reg);
2175     }
2176 }
2177
2178 static void
2179 _vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2180 {
2181     jit_int32_t         reg;
2182     if (jit_fpr_p(r0)) {
2183         reg = jit_get_reg(jit_class_gpr);
2184         movi(rn(reg), i0);
2185         VSTR_F32(r0, rn(reg), 0);
2186         jit_unget_reg(reg);
2187     }
2188     else
2189         sti_i(i0, r0);
2190 }
2191
2192 static void
2193 _vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2194 {
2195     jit_int32_t         reg;
2196     reg = jit_get_reg(jit_class_gpr);
2197     movi(rn(reg), i0);
2198     if (jit_fpr_p(r0))
2199         VSTR_F64(r0, rn(reg), 0);
2200     else {
2201         str_i(rn(reg), r0);
2202         stxi_i(4, rn(reg), r0 + 1);
2203     }
2204     jit_unget_reg(reg);
2205 }
2206
2207 static void
2208 _vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2209 {
2210     jit_int32_t         reg;
2211     if (jit_fpr_p(r2)) {
2212         reg = jit_get_reg(jit_class_gpr);
2213         addr(rn(reg), r0, r1);
2214         VSTR_F32(r2, rn(reg), 0);
2215         jit_unget_reg(reg);
2216     }
2217     else
2218         stxr_i(r0, r1, r2);
2219 }
2220
2221 static void
2222 _vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2223 {
2224     jit_int32_t         reg;
2225     reg = jit_get_reg(jit_class_gpr);
2226     addr(rn(reg), r0, r1);
2227     if (jit_fpr_p(r2))
2228         VSTR_F64(r2, rn(reg), 0);
2229     else {
2230         str_i(rn(reg), r2);
2231         stxi_i(4, rn(reg), r2 + 1);
2232     }
2233     jit_unget_reg(reg);
2234 }
2235
2236 static void
2237 _vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2238 {
2239     jit_int32_t         reg;
2240     if (jit_fpr_p(r1)) {
2241         if (i0 >= 0) {
2242             assert(!(i0 & 3));
2243             if (i0 < 1024)
2244                 VSTR_F32(r1, r0, i0 >> 2);
2245             else {
2246                 reg = jit_get_reg(jit_class_gpr);
2247                 addi(rn(reg), r0, i0);
2248                 VSTR_F32(r1, rn(reg), 0);
2249                 jit_unget_reg(reg);
2250             }
2251         }
2252         else {
2253             i0 = -i0;
2254             assert(!(i0 & 3));
2255             if (i0 < 1024)
2256                 VSTRN_F32(r1, r0, i0 >> 2);
2257             else {
2258                 reg = jit_get_reg(jit_class_gpr);
2259                 subi(rn(reg), r0, i0);
2260                 VSTR_F32(r1, rn(reg), 0);
2261                 jit_unget_reg(reg);
2262             }
2263         }
2264     }
2265     else
2266         stxi_i(i0, r0, r1);
2267 }
2268
2269 static void
2270 _vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2271 {
2272     jit_int32_t         reg;
2273     if (jit_fpr_p(r1)) {
2274         if (i0 >= 0) {
2275             assert(!(i0 & 3));
2276             if (i0 < 0124)
2277                 VSTR_F64(r1, r0, i0 >> 2);
2278             else {
2279                 reg = jit_get_reg(jit_class_gpr);
2280                 addi(rn(reg), r0, i0);
2281                 VSTR_F64(r1, rn(reg), 0);
2282                 jit_unget_reg(reg);
2283             }
2284         }
2285         else {
2286             i0 = -i0;
2287             assert(!(i0 & 3));
2288             if (i0 < 1024)
2289                 VSTRN_F64(r1, r0, i0 >> 2);
2290             else {
2291                 reg = jit_get_reg(jit_class_gpr);
2292                 subi(rn(reg), r0, i0);
2293                 VSTR_F64(r1, rn(reg), 0);
2294                 jit_unget_reg(reg);
2295             }
2296         }
2297     }
2298     else {
2299         reg = jit_get_reg(jit_class_gpr);
2300         addi(rn(reg), r0, i0);
2301         str_i(rn(reg), r1);
2302         stxi_i(4, rn(reg), r1 + 1);
2303         jit_unget_reg(reg);
2304     }
2305 }
2306
2307 static void
2308 _vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2309 {
2310     jit_int32_t         reg;
2311
2312     assert(_jitc->function->self.call & jit_call_varargs);
2313
2314     /* Adjust pointer. */
2315     reg = jit_get_reg(jit_class_gpr);
2316     andi(rn(reg), r1, 7);
2317     addr(r1, r1, rn(reg));
2318     jit_unget_reg(reg);
2319
2320     /* Load argument. */
2321     vfp_ldr_d(r0, r1);
2322
2323     /* Update stack pointer. */
2324     addi(r1, r1, sizeof(jit_float64_t));
2325 }
2326 #  undef dbopi
2327 #  undef fbopi
2328 #  undef dopi
2329 #  undef fopi
2330 #endif