git subrepo pull (merge) --force deps/lightning
[pcsx_rearmed.git] / deps / lightning / lib / jit_arm-vfp.c
CommitLineData
4a71579b 1/*
79bfeef6 2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4a71579b
PC
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20#if PROTO
21/* as per vfp_regno macro, required due to "support" to soft float registers
22 * or using integer registers as arguments to float operations */
23# define _D8_REGNO 32
24# define ARM_V_Q 0x00000040
25# define FPSCR_N 0x80000000 /* Negative flag */
26# define FPSCR_Z 0x40000000 /* Zero flag */
27# define FPSCR_C 0x20000000 /* Carry flag */
28# define FPSCR_V 0x10000000 /* Overflow flag */
29# define FPSCR_QC 0x08000000 /* Cumulative saturation */
30# define FPSCR_AHP 0x04000000 /* Alt. half-precision */
31# define FPSCR_DN 0x02000000 /* Default NaN mode */
32# define FPSCR_FZ 0x01000000 /* Flush to zero */
33# define FPSCR_RMASK 0x00c00000
34# define FPSCR_RN 0x00000000 /* Round to Nearest */
35# define FPSCR_RP 0x00400000 /* Round to Plus Infinity */
36# define FPSCR_RM 0x00800000 /* Round to Minus Infinity */
37# define FPSCR_RZ 0x00c00000 /* Round towards Zero */
38# define FPSCR_STRIDE 0x00300000
39# define FPSCR_RES1 0x00080000 /* Reserved, UNK/SBZP */
40# define FPSCR_LEN 0x00070000
41# define FPSCR_IDE 0x00008000 /* Input Denormal trap */
42# define FPSCR_IXE 0x00001000 /* Inexact trap */
43# define FPSCR_UFE 0x00000800 /* Underflow trap */
44# define FPSCR_OFE 0x00000400 /* Overflow trap */
45# define FPSCR_DZE 0x00000200 /* Division by zero trap */
46# define FPSCR_IOE 0x00000100 /* Invalid Operation trap */
47# define FPSCR_IDC 0x00000080 /* Input Denormal flag */
48# define FPSCR_RES0 0x00000060 /* Reserved, UNK/SBZP */
49# define FPSCR_IXC 0x00000010 /* Inexact flag */
50# define FPSCR_UFC 0x00000008 /* Underflow flag */
51# define FPSCR_OFC 0x00000004 /* Overflow flag */
52# define FPSCR_DZC 0x00000002 /* Division by zero flag */
53# define FPSCR_IOC 0x00000001 /* Invalid Operation flag */
54# define ARM_V_E 0x00000080 /* ARM_VCMP except if NaN */
55# define ARM_V_Z 0x00010000 /* ARM_VCMP with zero */
56# define ARM_V_F64 0x00000100
57# define ARM_VADD_F 0x0e300a00
58# define ARM_VSUB_F 0x0e300a40
59# define ARM_VMUL_F 0x0e200a00
60# define ARM_VDIV_F 0x0e800a00
61# define ARM_VABS_F 0x0eb00ac0
62# define ARM_VNEG_F 0x0eb10a40
63# define ARM_VSQRT_F 0x0eb10ac0
64# define ARM_VMOV_F 0x0eb00a40
65# define ARM_VMOV_A_S 0x0e100a10 /* vmov rn, sn */
66# define ARM_VMOV_S_A 0x0e000a10 /* vmov sn, rn */
67# define ARM_VMOV_AA_D 0x0c500b10 /* vmov rn,rn, dn */
68# define ARM_VMOV_D_AA 0x0c400b10 /* vmov dn, rn,rn */
69# define ARM_VCMP 0x0eb40a40
70# define ARM_VMRS 0x0ef10a10
71# define ARM_VMSR 0x0ee10a10
72# define ARM_VCVT_2I 0x00040000 /* to integer */
73# define ARM_VCVT_2S 0x00010000 /* to signed */
74# define ARM_VCVT_RS 0x00000080 /* round to zero or signed */
75# define ARM_VCVT 0x0eb80a40
76# define ARM_VCVT_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS
77# define ARM_VCVT_U32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS
78# define ARM_VCVT_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_VCVT_RS|ARM_V_F64
79# define ARM_VCVT_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_RS|ARM_V_F64
80# define ARM_VCVT_F32_S32 ARM_VCVT|ARM_VCVT_RS
81# define ARM_VCVT_F32_U32 ARM_VCVT
82# define ARM_VCVT_F64_S32 ARM_VCVT|ARM_VCVT_RS|ARM_V_F64
83# define ARM_VCVT_F64_U32 ARM_VCVT|ARM_V_F64
84# define ARM_VCVT_F 0x0eb70ac0
85# define ARM_VCVT_F32_F64 ARM_VCVT_F
86# define ARM_VCVT_F64_F32 ARM_VCVT_F|ARM_V_F64
87# define ARM_VCVTR_S32_F32 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S
88# define ARM_VCVTR_U32_F32 ARM_VCVT|ARM_VCVT_2I
89# define ARM_VCVTR_S32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_VCVT_2S|ARM_V_F64
90# define ARM_VCVTR_U32_F64 ARM_VCVT|ARM_VCVT_2I|ARM_V_F64
ba86ff93
PC
91# define ARM_VFMA 0x0ea00a00
92# define ARM_VFMS 0x0ea00a40
93# define ARM_VFNMA 0x0e900a00
94# define ARM_VFNMS 0x0e900a40
4a71579b
PC
95# define ARM_V_D 0x00400000
96# define ARM_V_N 0x00000080
97# define ARM_V_Q 0x00000040
98# define ARM_V_M 0x00000020
99# define ARM_V_U 0x01000000
100# define ARM_V_I16 0x00100000
101# define ARM_V_I32 0x00200000
102# define ARM_V_I64 0x00300000
103# define ARM_V_S16 0x00040000
104# define ARM_V_S32 0x00080000
105# define ARM_VADD_I 0x02000800
106# define ARM_VQADD_I 0x02000010 /* set flag on over/carry */
107# define ARM_VADDL_I 0x02800000 /* q=d+d */
108# define ARM_VADDW_I 0x02800100 /* q=q+d */
109# define ARM_VSUB_I 0x03000800
110# define ARM_VQSUB_I 0x02000210 /* set flag on over/carry */
111# define ARM_VSUBL_I 0x02800200
112# define ARM_VSUBW_I 0x02800300
113# define ARM_VMUL_I 0x02000910
114# define ARM_VMULL_I 0x02800c00
115# define ARM_VABS_I 0x03b10300
116# define ARM_VQABS_I 0x03b00700 /* sets flag on overflow */
117# define ARM_VNEG_I 0x03b10380
118# define ARM_VQNEG_I 0x03b00780 /* sets flag on overflow */
119# define ARM_VAND 0x02000110
120# define ARM_VBIC 0x02100110
121# define ARM_VORR 0x02200110
122# define ARM_VORN 0x02300110
123# define ARM_VEOR 0x03000110
124# define ARM_VMOVL_S8 0x00080000
125# define ARM_VMOVL_S16 0x00100000
126# define ARM_VMOVL_S32 0x00200000
127# define ARM_VMOVL_I 0x02800a10
128# define ARM_VMOVI 0x02800010
129# define ARM_VMVNI 0x02800030
130# define ARM_VLDR 0x0d100a00
131# define ARM_VSTR 0x0d000a00
132# define ARM_VM 0x0c000a00
133# define ARM_VMOV_ADV_U 0x00800000 /* zero extend */
134# define ARM_VMOV_ADV_8 0x00400000
135# define ARM_VMOV_ADV_16 0x00000020
136# define ARM_VMOV_A_D 0x0e100b10
137# define ARM_VMOV_D_A 0x0e000b10
ba86ff93 138# define ARM_VCNT 0x03b00500
4a71579b
PC
139# define vodi(oi,r0) _vodi(_jit,oi,r0)
140static void _vodi(jit_state_t*,int,int) maybe_unused;
141# define voqi(oi,r0) _voqi(_jit,oi,r0)
142static void _voqi(jit_state_t*,int,int) maybe_unused;
143# define vo_ss(o,r0,r1) _cc_vo_ss(_jit,ARM_CC_NV,o,r0,r1)
144# define cc_vo_ss(cc,o,r0,r1) _cc_vo_ss(_jit,cc,o,r0,r1)
145static void _cc_vo_ss(jit_state_t*,int,int,int,int);
146# define vo_dd(o,r0,r1) _cc_vo_dd(_jit,ARM_CC_NV,o,r0,r1)
147# define cc_vo_dd(cc,o,r0,r1) _cc_vo_dd(_jit,cc,o,r0,r1)
148static void _cc_vo_dd(jit_state_t*,int,int,int,int);
149# define vo_qd(o,r0,r1) _cc_vo_qd(_jit,ARM_CC_NV,o,r0,r1)
150# define cc_vo_qd(cc,o,r0,r1) _cc_vo_qd(_jit,cc,o,r0,r1)
151static void _cc_vo_qd(jit_state_t*,int,int,int,int) maybe_unused;
152# define vo_qq(o,r0,r1) _cc_vo_qq(_jit,ARM_CC_NV,o,r0,r1)
153# define cc_vo_qq(cc,o,r0,r1) _cc_vo_qq(_jit,cc,o,r0,r1)
154static void _cc_vo_qq(jit_state_t*,int,int,int,int) maybe_unused;
155# define vorr_(o,r0,r1) _cc_vorr_(_jit,ARM_CC_NV,o,r0,r1)
156# define cc_vorr_(cc,o,r0,r1) _cc_vorr_(_jit,cc,o,r0,r1)
157static void _cc_vorr_(jit_state_t*,int,int,int,int);
158# define vors_(o,r0,r1) _cc_vors_(_jit,ARM_CC_NV,o,r0,r1)
159# define cc_vors_(cc,o,r0,r1) _cc_vors_(_jit,cc,o,r0,r1)
160static void _cc_vors_(jit_state_t*,int,int,int,int);
161# define vorv_(o,r0,r1) _cc_vorv_(_jit,ARM_CC_NV,o,r0,r1)
162# define cc_vorv_(cc,o,r0,r1) _cc_vorv_(_jit,cc,o,r0,r1)
163static void _cc_vorv_(jit_state_t*,int,int,int,int) maybe_unused;
ba86ff93
PC
164# define vo_vv(o,r0,r1) _cc_vo_vv(_jit,ARM_CC_NV,o,r0,r1)
165static void _cc_vo_vv(jit_state_t*,int,int,int,int) maybe_unused;
4a71579b
PC
166# define vori_(o,r0,r1) _cc_vori_(_jit,ARM_CC_NV,o,r0,r1)
167# define cc_vori_(cc,o,r0,r1) _cc_vori_(_jit,cc,o,r0,r1)
168static void _cc_vori_(jit_state_t*,int,int,int,int);
169# define vorrd(o,r0,r1,r2) _cc_vorrd(_jit,ARM_CC_NV,o,r0,r1,r2)
170# define cc_vorrd(cc,o,r0,r1,r2) _cc_vorrd(_jit,cc,o,r0,r1,r2)
171static void _cc_vorrd(jit_state_t*,int,int,int,int,int);
172# define vosss(o,r0,r1,r2) _cc_vosss(_jit,ARM_CC_NV,o,r0,r1,r2)
173# define cc_vosss(cc,o,r0,r1,r2) _cc_vosss(_jit,cc,o,r0,r1,r2)
174static void _cc_vosss(jit_state_t*,int,int,int,int,int);
175# define voddd(o,r0,r1,r2) _cc_voddd(_jit,ARM_CC_NV,o,r0,r1,r2)
176# define cc_voddd(cc,o,r0,r1,r2) _cc_voddd(_jit,cc,o,r0,r1,r2)
177static void _cc_voddd(jit_state_t*,int,int,int,int,int);
178# define voqdd(o,r0,r1,r2) _cc_voqdd(_jit,ARM_CC_NV,o,r0,r1,r2)
179# define cc_voqdd(cc,o,r0,r1,r2) _cc_voqdd(_jit,cc,o,r0,r1,r2)
180static void _cc_voqdd(jit_state_t*,int,int,int,int,int) maybe_unused;
181# define voqqd(o,r0,r1,r2) _cc_voqqd(_jit,ARM_CC_NV,o,r0,r1,r2)
182# define cc_voqqd(cc,o,r0,r1,r2) _cc_voqqd(_jit,cc,o,r0,r1,r2)
183static void _cc_voqqd(jit_state_t*,int,int,int,int,int) maybe_unused;
184# define voqqq(o,r0,r1,r2) _cc_voqqq(_jit,ARM_CC_NV,o,r0,r1,r2)
185# define cc_voqqq(cc,o,r0,r1,r2) _cc_voqqq(_jit,cc,o,r0,r1,r2)
186static void _cc_voqqq(jit_state_t*,int,int,int,int,int) maybe_unused;
187# define cc_vldst(cc,o,r0,r1,i0) _cc_vldst(_jit,cc,o,r0,r1,i0)
188static void _cc_vldst(jit_state_t*,int,int,int,int,int);
189# define cc_vorsl(cc,o,r0,r1,i0) _cc_vorsl(_jit,cc,o,r0,r1,i0)
190static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
191# define CC_VADD_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VADD_F,r0,r1,r2)
192# define VADD_F32(r0,r1,r2) CC_VADD_F32(ARM_CC_AL,r0,r1,r2)
193# define CC_VADD_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VADD_F|ARM_V_F64,r0,r1,r2)
194# define VADD_F64(r0,r1,r2) CC_VADD_F64(ARM_CC_AL,r0,r1,r2)
195# define CC_VSUB_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VSUB_F,r0,r1,r2)
196# define VSUB_F32(r0,r1,r2) CC_VSUB_F32(ARM_CC_AL,r0,r1,r2)
197# define CC_VSUB_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VSUB_F|ARM_V_F64,r0,r1,r2)
198# define VSUB_F64(r0,r1,r2) CC_VSUB_F64(ARM_CC_AL,r0,r1,r2)
199# define CC_VMUL_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VMUL_F,r0,r1,r2)
200# define VMUL_F32(r0,r1,r2) CC_VMUL_F32(ARM_CC_AL,r0,r1,r2)
201# define CC_VMUL_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VMUL_F|ARM_V_F64,r0,r1,r2)
202# define VMUL_F64(r0,r1,r2) CC_VMUL_F64(ARM_CC_AL,r0,r1,r2)
203# define CC_VDIV_F32(cc,r0,r1,r2) cc_vosss(cc,ARM_VDIV_F,r0,r1,r2)
204# define VDIV_F32(r0,r1,r2) CC_VDIV_F32(ARM_CC_AL,r0,r1,r2)
205# define CC_VDIV_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VDIV_F|ARM_V_F64,r0,r1,r2)
206# define VDIV_F64(r0,r1,r2) CC_VDIV_F64(ARM_CC_AL,r0,r1,r2)
207# define CC_VABS_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VABS_F,r0,r1)
208# define VABS_F32(r0,r1) CC_VABS_F32(ARM_CC_AL,r0,r1)
209# define CC_VABS_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VABS_F|ARM_V_F64,r0,r1)
210# define VABS_F64(r0,r1) CC_VABS_F64(ARM_CC_AL,r0,r1)
211# define CC_VNEG_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VNEG_F,r0,r1)
212# define VNEG_F32(r0,r1) CC_VNEG_F32(ARM_CC_AL,r0,r1)
213# define CC_VNEG_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VNEG_F|ARM_V_F64,r0,r1)
214# define VNEG_F64(r0,r1) CC_VNEG_F64(ARM_CC_AL,r0,r1)
215# define CC_VSQRT_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VSQRT_F,r0,r1)
216# define VSQRT_F32(r0,r1) CC_VSQRT_F32(ARM_CC_AL,r0,r1)
217# define CC_VSQRT_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VSQRT_F|ARM_V_F64,r0,r1)
218# define VSQRT_F64(r0,r1) CC_VSQRT_F64(ARM_CC_AL,r0,r1)
ba86ff93
PC
219# define CC_VFMA_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMA,r0,r1,r2)
220# define VFMA_F32(r0,r1,r2) CC_VFMA_F32(ARM_CC_AL,r0,r1,r2)
221# define CC_VFMA_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMA|ARM_V_F64,r0,r1,r2)
222# define VFMA_F64(r0,r1,r2) CC_VFMA_F64(ARM_CC_AL,r0,r1,r2)
223# define CC_VFMS_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMS,r0,r1,r2)
224# define VFMS_F32(r0,r1,r2) CC_VFMS_F32(ARM_CC_AL,r0,r1,r2)
225# define CC_VFMS_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFMS|ARM_V_F64,r0,r1,r2)
226# define VFMS_F64(r0,r1,r2) CC_VFMS_F64(ARM_CC_AL,r0,r1,r2)
227# define CC_VFNMA_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMA,r0,r1,r2)
228# define VFNMA_F32(r0,r1,r2) CC_VFNMA_F32(ARM_CC_AL,r0,r1,r2)
229# define CC_VFNMA_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMA|ARM_V_F64,r0,r1,r2)
230# define VFNMA_F64(r0,r1,r2) CC_VFNMA_F64(ARM_CC_AL,r0,r1,r2)
231# define CC_VFNMS_F32(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMS,r0,r1,r2)
232# define VFNMS_F32(r0,r1,r2) CC_VFNMS_F32(ARM_CC_AL,r0,r1,r2)
233# define CC_VFNMS_F64(cc,r0,r1,r2) cc_voddd(cc,ARM_VFNMS|ARM_V_F64,r0,r1,r2)
234# define VFNMS_F64(r0,r1,r2) CC_VFNMS_F64(ARM_CC_AL,r0,r1,r2)
4a71579b
PC
235# define CC_VMOV_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VMOV_F,r0,r1)
236# define VMOV_F32(r0,r1) CC_VMOV_F32(ARM_CC_AL,r0,r1)
237# define CC_VMOV_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VMOV_F|ARM_V_F64,r0,r1)
238# define VMOV_F64(r0,r1) CC_VMOV_F64(ARM_CC_AL,r0,r1)
239# define CC_VMOV_AA_D(cc,r0,r1,r2) cc_vorrd(cc,ARM_VMOV_AA_D,r0,r1,r2)
240# define VMOV_AA_D(r0,r1,r2) CC_VMOV_AA_D(ARM_CC_AL,r0,r1,r2)
241# define CC_VMOV_D_AA(cc,r0,r1,r2) cc_vorrd(cc,ARM_VMOV_D_AA,r1,r2,r0)
242# define VMOV_D_AA(r0,r1,r2) CC_VMOV_D_AA(ARM_CC_AL,r0,r1,r2)
243# define CC_VMOV_A_S(cc,r0,r1) cc_vors_(cc,ARM_VMOV_A_S,r0,r1)
244# define VMOV_A_S(r0,r1) CC_VMOV_A_S(ARM_CC_AL,r0,r1)
245# define CC_VMOV_S_A(cc,r0,r1) cc_vors_(cc,ARM_VMOV_S_A,r1,r0)
246# define VMOV_S_A(r0,r1) CC_VMOV_S_A(ARM_CC_AL,r0,r1)
247# define CC_VCMP_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCMP,r0,r1)
248# define VCMP_F32(r0,r1) CC_VCMP_F32(ARM_CC_AL,r0,r1)
249# define CC_VCMP_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VCMP|ARM_V_F64,r0,r1)
250# define VCMP_F64(r0,r1) CC_VCMP_F64(ARM_CC_AL,r0,r1)
251# define CC_VCMPE_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCMP|ARM_V_E,r0,r1)
252# define VCMPE_F32(r0,r1) CC_VCMPE_F32(ARM_CC_AL,r0,r1)
253# define CC_VCMPE_F64(cc,r0,r1) cc_vo_dd(cc,ARM_VCMP|ARM_V_E|ARM_V_F64,r0,r1)
254# define VCMPE_F64(r0,r1) CC_VCMPE_F64(ARM_CC_AL,r0,r1)
255# define CC_VCMPZ_F32(cc,r0) cc_vo_ss(cc,ARM_VCMP|ARM_V_Z,r0,0)
256# define VCMPZ_F32(r0) CC_VCMPZ_F32(ARM_CC_AL,r0)
257# define CC_VCMPZ_F64(cc,r0) cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_F64,r0,0)
258# define VCMPZ_F64(r0) CC_VCMPZ_F64(ARM_CC_AL,r0)
259# define CC_VCMPEZ_F32(cc,r0) cc_vo_ss(cc,ARM_VCMP|ARM_V_Z|ARM_V_E,r0,0)
260# define VCMPEZ_F32(r0) CC_VCMPEZ_F32(ARM_CC_AL,r0)
261# define CC_VCMPEZ_F64(cc,r0) cc_vo_dd(cc,ARM_VCMP|ARM_V_Z|ARM_V_E|ARM_V_F64,r0,0)
262# define VCMPEZ_F64(r0) CC_VCMPEZ_F64(ARM_CC_AL,r0)
263# define CC_VMRS(cc,r0) cc_vorr_(cc,ARM_VMRS,r0,0)
264# define VMRS(r0) CC_VMRS(ARM_CC_AL,r0)
265# define CC_VMSR(cc,r0) cc_vorr_(cc,ARM_VMSR,r0,0)
266# define VMSR(r0) CC_VMSR(ARM_CC_AL,r0)
267# define CC_VCVT_S32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_S32_F32,r0,r1)
268# define VCVT_S32_F32(r0,r1) CC_VCVT_S32_F32(ARM_CC_AL,r0,r1)
269# define CC_VCVT_U32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_U32_F32,r0,r1)
270# define VCVT_U32_F32(r0,r1) CC_VCVT_U32_F32(ARM_CC_AL,r0,r1)
271# define CC_VCVT_S32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_S32_F64,r0,r1)
272# define VCVT_S32_F64(r0,r1) CC_VCVT_S32_F64(ARM_CC_AL,r0,r1)
273# define CC_VCVT_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_U32_F64,r0,r1)
274# define VCVT_U32_F64(r0,r1) CC_VCVT_U32_F64(ARM_CC_AL,r0,r1)
275# define CC_VCVT_F32_S32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_S32,r0,r1)
276# define VCVT_F32_S32(r0,r1) CC_VCVT_F32_S32(ARM_CC_AL,r0,r1)
277# define CC_VCVT_F32_U32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_U32,r0,r1)
278# define VCVT_F32_U32(r0,r1) CC_VCVT_F32_U32(ARM_CC_AL,r0,r1)
279# define CC_VCVT_F64_S32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_S32,r0,r1)
280# define VCVT_F64_S32(r0,r1) CC_VCVT_F64_S32(ARM_CC_AL,r0,r1)
281# define CC_VCVT_F64_U32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_U32,r0,r1)
282# define VCVT_F64_U32(r0,r1) CC_VCVT_F64_U32(ARM_CC_AL,r0,r1)
283# define CC_VCVT_F32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F32_F64,r0,r1)
284# define VCVT_F32_F64(r0,r1) CC_VCVT_F32_F64(ARM_CC_AL,r0,r1)
285# define CC_VCVT_F64_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVT_F64_F32,r0,r1)
286# define VCVT_F64_F32(r0,r1) CC_VCVT_F64_F32(ARM_CC_AL,r0,r1)
287# define CC_VCVTR_S32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_S32_F32,r0,r1)
288# define VCVTR_S32_F32(r0,r1) CC_VCVTR_S32_F32(ARM_CC_AL,r0,r1)
289# define CC_VCVTR_U32_F32(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F32,r0,r1)
290# define VCVTR_U32_F32(r0,r1) CC_VCVTR_U32_F32(ARM_CC_AL,r0,r1)
291# define CC_VCVTR_S32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_S32_F64,r0,r1)
292# define VCVTR_S32_F64(r0,r1) CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
293# define CC_VCVTR_U32_F64(cc,r0,r1) cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
294# define VCVTR_U32_F64(r0,r1) CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
295# define CC_VLDMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
296# define VLDMIA_F32(r0,r1,i0) CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
297# define CC_VLDMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
298# define VLDMIA_F64(r0,r1,i0) CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
299# define CC_VSTMIA_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
300# define VSTMIA_F32(r0,r1,i0) CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
301# define CC_VSTMIA_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
302# define VSTMIA_F64(r0,r1,i0) CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
303# define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
304# define VLDMIA_U_F32(r0,r1,i0) CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
305# define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
306# define VLDMIA_U_F64(r0,r1,i0) CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
307# define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
308# define VSTMIA_U_F32(r0,r1,i0) CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
309# define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
310# define VSTMIA_U_F64(r0,r1,i0) CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
311# define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
312# define VLDMDB_U_F32(r0,r1,i0) CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
313# define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
314# define VLDMDB_U_F64(r0,r1,i0) CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
315# define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
316# define VSTMDB_U_F32(r0,r1,i0) CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
317# define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
318# define VSTMDB_U_F64(r0,r1,i0) CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
319# define CC_VPUSH_F32(cc,r0,i0) CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
320# define VPUSH_F32(r0,i0) CC_VPUSH_F32(ARM_CC_AL,r0,i0)
321# define CC_VPUSH_F64(cc,r0,i0) CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
322# define VPUSH_F64(r0,i0) CC_VPUSH_F64(ARM_CC_AL,r0,i0)
323# define CC_VPOP_F32(cc,r0,i0) CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
324# define VPOP_F32(r0,i0) CC_VPOP_F32(ARM_CC_AL,r0,i0)
325# define CC_VPOP_F64(cc,r0,i0) CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
326# define VPOP_F64(r0,i0) CC_VPOP_F64(ARM_CC_AL,r0,i0)
327# define CC_VMOV_A_S8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
328# define VMOV_A_S8(r0,r1) CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
329# define CC_VMOV_A_U8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
330# define VMOV_A_U8(r0,r1) CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
331# define CC_VMOV_A_S16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16,r0,r1)
332# define VMOV_A_S16(r0,r1) CC_VMOV_A_S16(ARM_CC_AL,r0,r1)
333# define CC_VMOV_A_U16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_16|ARM_VMOV_ADV_U,r0,r1)
334# define VMOV_A_U16(r0,r1) CC_VMOV_A_U16(ARM_CC_AL,r0,r1)
335# define CC_VMOV_A_S32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D,r0,r1)
336# define VMOV_A_S32(r0,r1) CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
337# define CC_VMOV_A_U32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
338# define VMOV_A_U32(r0,r1) CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
339# define CC_VMOV_V_I8(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
340# define VMOV_V_I8(r0,r1) CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
341# define CC_VMOV_V_I16(cc,r0,r1) cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
342# define VMOV_V_I16(r0,r1) CC_VMOV_V_I16(ARM_CC_AL,r0,r1)
343# define CC_VMOV_V_I32(cc,r0,r1) cc_vori_(cc,ARM_VMOV_D_A,r1,r0)
344# define VMOV_V_I32(r0,r1) CC_VMOV_V_I32(ARM_CC_AL,r0,r1)
ba86ff93 345# define VCNT(r0,r1) vo_vv(ARM_VCNT,r0,r1)
4a71579b
PC
346# define VADD_I8(r0,r1,r2) voddd(ARM_VADD_I,r0,r1,r2)
347# define VADDQ_I8(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_Q,r0,r1,r2)
348# define VADD_I16(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I16,r0,r1,r2)
349# define VADDQ_I16(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
350# define VADD_I32(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I32,r0,r1,r2)
351# define VADDQ_I32(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
352# define VADD_I64(r0,r1,r2) voddd(ARM_VADD_I|ARM_V_I64,r0,r1,r2)
353# define VADDQ_I64(r0,r1,r2) voqqq(ARM_VADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
354# define VQADD_S8(r0,r1,r2) voddd(ARM_VQADD_I,r0,r1,r2)
355# define VQADDQ_S8(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_Q,r0,r1,r2)
356# define VQADD_U8(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_U,r0,r1,r2)
357# define VQADDQ_U8(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
358# define VQADD_S16(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I16,r0,r1,r2)
359# define VQADDQ_S16(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
360# define VQADD_U16(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
361# define VQADDQ_U16(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
362# define VQADD_S32(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I32,r0,r1,r2)
363# define VQADDQ_S32(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
364# define VQADD_U32(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
365# define VQADDQ_U32(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
366# define VQADD_S64(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I64,r0,r1,r2)
367# define VQADDQ_S64(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
368# define VQADD_U64(r0,r1,r2) voddd(ARM_VQADD_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
369# define VQADDQ_U64(r0,r1,r2) voqqq(ARM_VQADD_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
370# define VADDL_S8(r0,r1,r2) voqdd(ARM_VADDL_I,r0,r1,r2)
371# define VADDL_U8(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_U,r0,r1,r2)
372# define VADDL_S16(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I16,r0,r1,r2)
373# define VADDL_U16(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
374# define VADDL_S32(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I32,r0,r1,r2)
375# define VADDL_U32(r0,r1,r2) voqdd(ARM_VADDL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
376# define VADDW_S8(r0,r1,r2) voqqd(ARM_VADDW_I,r0,r1,r2)
377# define VADDW_U8(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_U,r0,r1,r2)
378# define VADDW_S16(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I16,r0,r1,r2)
379# define VADDW_U16(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
380# define VADDW_S32(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I32,r0,r1,r2)
381# define VADDW_U32(r0,r1,r2) voqqd(ARM_VADDW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
382# define VSUB_I8(r0,r1,r2) voddd(ARM_VSUB_I,r0,r1,r2)
383# define VSUBQ_I8(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_Q,r0,r1,r2)
384# define VSUB_I16(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I16,r0,r1,r2)
385# define VSUBQ_I16(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
386# define VSUB_I32(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I32,r0,r1,r2)
387# define VSUBQ_I32(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
388# define VSUB_I64(r0,r1,r2) voddd(ARM_VSUB_I|ARM_V_I64,r0,r1,r2)
389# define VSUBQ_I64(r0,r1,r2) voqqq(ARM_VSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
390# define VQSUB_S8(r0,r1,r2) voddd(ARM_VQSUB_I,r0,r1,r2)
391# define VQSUBQ_S8(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_Q,r0,r1,r2)
392# define VQSUB_U8(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_U,r0,r1,r2)
393# define VQSUBQ_U8(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_U|ARM_V_Q,r0,r1,r2)
394# define VQSUB_S16(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I16,r0,r1,r2)
395# define VQSUBQ_S16(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_Q,r0,r1,r2)
396# define VQSUB_U16(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
397# define VQSUBQ_U16(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I16|ARM_V_U|ARM_V_Q,r0,r1,r2)
398# define VQSUB_S32(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I32,r0,r1,r2)
399# define VQSUBQ_S32(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_Q,r0,r1,r2)
400# define VQSUB_U32(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
401# define VQSUBQ_U32(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I32|ARM_V_U|ARM_V_Q,r0,r1,r2)
402# define VQSUB_S64(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I64,r0,r1,r2)
403# define VQSUBQ_S64(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_Q,r0,r1,r2)
404# define VQSUB_U64(r0,r1,r2) voddd(ARM_VQSUB_I|ARM_V_I64|ARM_V_U,r0,r1,r2)
405# define VQSUBQ_U64(r0,r1,r2) voqqq(ARM_VQSUB_I|ARM_V_I64|ARM_V_U|ARM_V_Q,r0,r1,r2)
406# define VSUBL_S8(r0,r1,r2) voqdd(ARM_VSUBL_I,r0,r1,r2)
407# define VSUBL_U8(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_U,r0,r1,r2)
408# define VSUBL_S16(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I16,r0,r1,r2)
409# define VSUBL_U16(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
410# define VSUBL_S32(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I32,r0,r1,r2)
411# define VSUBL_U32(r0,r1,r2) voqdd(ARM_VSUBL_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
412# define VSUBW_S8(r0,r1,r2) voqqd(ARM_VSUBW_I,r0,r1,r2)
413# define VSUBW_U8(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_U,r0,r1,r2)
414# define VSUBW_S16(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I16,r0,r1,r2)
415# define VSUBW_U16(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I16|ARM_V_U,r0,r1,r2)
416# define VSUBW_S32(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I32,r0,r1,r2)
417# define VSUBW_U32(r0,r1,r2) voqqd(ARM_VSUBW_I|ARM_V_I32|ARM_V_U,r0,r1,r2)
418# define VMUL_I8(r0,r1,r2) voddd(ARM_VMUL_I,r0,r1,r2)
419# define VMULQ_I8(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q,r0,r1,r2)
420# define VMUL_I16(r0,r1,r2) voddd(ARM_VMUL_I|ARM_V_I16,r0,r1,r2)
421# define VMULQ_I16(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I16,r0,r1,r2)
422# define VMUL_I32(r0,r1,r2) voddd(ARM_VMUL_I|ARM_V_I32,r0,r1,r2)
423# define VMULQ_I32(r0,r1,r2) voqqq(ARM_VMUL_I|ARM_V_Q|ARM_V_I32,r0,r1,r2)
424# define VMULL_S8(r0,r1,r2) voddd(ARM_VMULL_I,r0,r1,r2)
425# define VMULL_U8(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U,r0,r1,r2)
426# define VMULL_S16(r0,r1,r2) voddd(ARM_VMULL_I|ARM_V_I16,r0,r1,r2)
427# define VMULL_U16(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I16,r0,r1,r2)
428# define VMULL_S32(r0,r1,r2) voddd(ARM_VMULL_I|ARM_V_I32,r0,r1,r2)
429# define VMULL_U32(r0,r1,r2) voqqq(ARM_VMULL_I|ARM_V_U|ARM_V_I32,r0,r1,r2)
430# define VABS_S8(r0,r1) vo_dd(ARM_VABS_I,r0,r1)
431# define VABSQ_S8(r0,r1) vo_qq(ARM_VABS_I|ARM_V_Q,r0,r1)
432# define VABS_S16(r0,r1) vo_dd(ARM_VABS_I|ARM_V_S16,r0,r1)
433# define VABSQ_S16(r0,r1) vo_qq(ARM_VABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
434# define VABS_S32(r0,r1) vo_dd(ARM_VABS_I|ARM_V_S32,r0,r1)
435# define VABSQ_S32(r0,r1) vo_qq(ARM_VABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
436# define VQABS_S8(r0,r1) vo_dd(ARM_VQABS_I,r0,r1)
437# define VQABSQ_S8(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_Q,r0,r1)
438# define VQABS_S16(r0,r1) vo_dd(ARM_VQABS_I|ARM_V_S16,r0,r1)
439# define VQABSQ_S16(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_S16|ARM_V_Q,r0,r1)
440# define VQABS_S32(r0,r1) vo_dd(ARM_VQABS_I|ARM_V_S32,r0,r1)
441# define VQABSQ_S32(r0,r1) vo_qq(ARM_VQABS_I|ARM_V_S32|ARM_V_Q,r0,r1)
442# define VNEG_S8(r0,r1) vo_dd(ARM_VNEG_I,r0,r1)
443# define VNEGQ_S8(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_Q,r0,r1)
444# define VNEG_S16(r0,r1) vo_dd(ARM_VNEG_I|ARM_V_S16,r0,r1)
445# define VNEGQ_S16(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
446# define VNEG_S32(r0,r1) vo_dd(ARM_VNEG_I|ARM_V_S32,r0,r1)
447# define VNEGQ_S32(r0,r1) vo_qq(ARM_VNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
448# define VQNEG_S8(r0,r1) vo_dd(ARM_VQNEG_I,r0,r1)
449# define VQNEGQ_S8(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_Q,r0,r1)
450# define VQNEG_S16(r0,r1) vo_dd(ARM_VQNEG_I|ARM_V_S16,r0,r1)
451# define VQNEGQ_S16(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_S16|ARM_V_Q,r0,r1)
452# define VQNEG_S32(r0,r1) vo_dd(ARM_VQNEG_I|ARM_V_S32,r0,r1)
453# define VQNEGQ_S32(r0,r1) vo_qq(ARM_VQNEG_I|ARM_V_S32|ARM_V_Q,r0,r1)
454# define VAND(r0,r1,r2) voddd(ARM_VAND,r0,r1,r2)
455# define VANDQ(r0,r1,r2) voqqq(ARM_VAND|ARM_V_Q,r0,r1,r2)
456# define VBIC(r0,r1,r2) voddd(ARM_VBIC,r0,r1,r2)
457# define VBICQ(r0,r1,r2) voqqq(ARM_VBIC|ARM_V_Q,r0,r1,r2)
458# define VORR(r0,r1,r2) voddd(ARM_VORR,r0,r1,r2)
459# define VORRQ(r0,r1,r2) voqqq(ARM_VORR|ARM_V_Q,r0,r1,r2)
460# define VORN(r0,r1,r2) voddd(ARM_VORN,r0,r1,r2)
461# define VORNQ(r0,r1,r2) voqqq(ARM_VORN|ARM_V_Q,r0,r1,r2)
462# define VEOR(r0,r1,r2) voddd(ARM_VEOR,r0,r1,r2)
463# define VEORQ(r0,r1,r2) voqqq(ARM_VEOR|ARM_V_Q,r0,r1,r2)
464# define VMOV(r0,r1) VORR(r0,r1,r1)
465# define VMOVQ(r0,r1) VORRQ(r0,r1,r1)
466# define VMOVL_S8(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S8,r0,r1)
467# define VMOVL_U8(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S8,r0,r1)
468# define VMOVL_S16(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S16,r0,r1)
469# define VMOVL_U16(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S16,r0,r1)
470# define VMOVL_S32(r0,r1) vo_qd(ARM_VMOVL_I|ARM_VMOVL_S32,r0,r1)
471# define VMOVL_U32(r0,r1) vo_qd(ARM_VMOVL_I|ARM_V_U|ARM_VMOVL_S32,r0,r1)
472/* "oi" should be the result of encode_vfp_double */
473# define VIMM(oi,r0) vodi(oi,r0)
474# define VIMMQ(oi,r0) voqi(oi|ARM_V_Q,r0)
475/* index is multipled by four */
476# define CC_VLDRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR,r0,r1,i0)
477# define VLDRN_F32(r0,r1,i0) CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
478# define CC_VLDR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
479# define VLDR_F32(r0,r1,i0) CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
480# define CC_VLDRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
481# define VLDRN_F64(r0,r1,i0) CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
482# define CC_VLDR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
483# define VLDR_F64(r0,r1,i0) CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
484# define CC_VSTRN_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR,r0,r1,i0)
485# define VSTRN_F32(r0,r1,i0) CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
486# define CC_VSTR_F32(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
487# define VSTR_F32(r0,r1,i0) CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
488# define CC_VSTRN_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
489# define VSTRN_F64(r0,r1,i0) CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
490# define CC_VSTR_F64(cc,r0,r1,i0) cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
491# define VSTR_F64(r0,r1,i0) CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
ba86ff93
PC
492# define vfp_popcntr(r0,r1) _vfp_popcntr(_jit,r0,r1)
493static void _vfp_popcntr(jit_state_t*,jit_int32_t,jit_int32_t);
4a71579b
PC
494# define vfp_movr_f(r0,r1) _vfp_movr_f(_jit,r0,r1)
495static void _vfp_movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
4a71579b
PC
496# define vfp_movi_f(r0,i0) _vfp_movi_f(_jit,r0,i0)
497static void _vfp_movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
ba86ff93
PC
498# define vfp_movr_w_f(r0, r1) VMOV_S_A(r0, r1)
499# define vfp_movr_f_w(r0, r1) VMOV_A_S(r0, r1)
500# define vfp_movi_w_f(r0, i0) _vfp_movi_w_f(_jit, r0, i0)
501static void _vfp_movi_w_f(jit_state_t*, jit_int32_t, jit_word_t);
502# define vfp_movr_d(r0,r1) _vfp_movr_d(_jit,r0,r1)
503static void _vfp_movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
4a71579b
PC
504# define vfp_movi_d(r0,i0) _vfp_movi_d(_jit,r0,i0)
505static void _vfp_movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
ba86ff93
PC
506# define vfp_movr_ww_d(r0, r1, r2) VMOV_D_AA(r0, r1, r2)
507# define vfp_movr_d_ww(r0, r1, r2) VMOV_AA_D(r0, r1, r2)
508static void _vfp_movr_d_ww(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
509# define vfp_movi_ww_d(r0, i0, i1) _vfp_movi_ww_d(_jit, r0, i0, i1)
510static void _vfp_movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
4a71579b
PC
511# define vfp_extr_f(r0,r1) _vfp_extr_f(_jit,r0,r1)
512static void _vfp_extr_f(jit_state_t*,jit_int32_t,jit_int32_t);
513# define vfp_extr_d(r0,r1) _vfp_extr_d(_jit,r0,r1)
514static void _vfp_extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
515# define vfp_extr_d_f(r0,r1) _vfp_extr_d_f(_jit,r0,r1)
516static void _vfp_extr_d_f(jit_state_t*,jit_int32_t,jit_int32_t);
517# define vfp_extr_f_d(r0,r1) _vfp_extr_f_d(_jit,r0,r1)
518static void _vfp_extr_f_d(jit_state_t*,jit_int32_t,jit_int32_t);
519# define vfp_truncr_f_i(r0,r1) _vfp_truncr_f_i(_jit,r0,r1)
520static void _vfp_truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
521# define vfp_truncr_d_i(r0,r1) _vfp_truncr_d_i(_jit,r0,r1)
522static void _vfp_truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
523# define vfp_absr_f(r0,r1) VABS_F32(r0,r1)
524# define vfp_absr_d(r0,r1) VABS_F64(r0,r1)
525# define vfp_negr_f(r0,r1) VNEG_F32(r0,r1)
526# define vfp_negr_d(r0,r1) VNEG_F64(r0,r1)
527# define vfp_sqrtr_f(r0,r1) VSQRT_F32(r0,r1)
ba86ff93
PC
528# define vfp_fmar_f(r0,r1,r2,r3) _vfp_fmar_f(_jit,r0,r1,r2,r3)
529static void _vfp_fmar_f(jit_state_t*,
530 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
531# define vfp_fmsr_f(r0,r1,r2,r3) _vfp_fmsr_f(_jit,r0,r1,r2,r3)
532static void _vfp_fmsr_f(jit_state_t*,
533 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
534# define vfp_fnmar_f(r0,r1,r2,r3) _vfp_fnmar_f(_jit,r0,r1,r2,r3)
535static void _vfp_fnmar_f(jit_state_t*,
536 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
537# define vfp_fnmsr_f(r0,r1,r2,r3) _vfp_fnmsr_f(_jit,r0,r1,r2,r3)
538static void _vfp_fnmsr_f(jit_state_t*,
539 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b 540# define vfp_sqrtr_d(r0,r1) VSQRT_F64(r0,r1)
ba86ff93
PC
541# define vfp_fmar_d(r0,r1,r2,r3) _vfp_fmar_d(_jit,r0,r1,r2,r3)
542static void _vfp_fmar_d(jit_state_t*,
543 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
544# define vfp_fmsr_d(r0,r1,r2,r3) _vfp_fmsr_d(_jit,r0,r1,r2,r3)
545static void _vfp_fmsr_d(jit_state_t*,
546 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
547# define vfp_fnmar_d(r0,r1,r2,r3) _vfp_fnmar_d(_jit,r0,r1,r2,r3)
548static void _vfp_fnmar_d(jit_state_t*,
549 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
550# define vfp_fnmsr_d(r0,r1,r2,r3) _vfp_fnmsr_d(_jit,r0,r1,r2,r3)
551static void _vfp_fnmsr_d(jit_state_t*,
552 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
553# define vfp_addr_f(r0,r1,r2) VADD_F32(r0,r1,r2)
554# define vfp_addi_f(r0,r1,i0) _vfp_addi_f(_jit,r0,r1,i0)
555static void _vfp_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
556# define vfp_addr_d(r0,r1,r2) VADD_F64(r0,r1,r2)
557# define vfp_addi_d(r0,r1,i0) _vfp_addi_d(_jit,r0,r1,i0)
558static void _vfp_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
559# define vfp_subr_f(r0,r1,r2) VSUB_F32(r0,r1,r2)
560# define vfp_subi_f(r0,r1,i0) _vfp_subi_f(_jit,r0,r1,i0)
561static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
562# define vfp_subr_d(r0,r1,r2) VSUB_F64(r0,r1,r2)
563# define vfp_subi_d(r0,r1,i0) _vfp_subi_d(_jit,r0,r1,i0)
564static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
565# define vfp_rsbr_f(r0,r1,r2) vfp_subr_f(r0,r2,r1)
566# define vfp_rsbi_f(r0,r1,i0) _vfp_rsbi_f(_jit,r0,r1,i0)
567static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
568# define vfp_rsbr_d(r0,r1,r2) vfp_subr_d(r0,r2,r1)
569# define vfp_rsbi_d(r0,r1,i0) _vfp_rsbi_d(_jit,r0,r1,i0)
570static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
571# define vfp_mulr_f(r0,r1,r2) VMUL_F32(r0,r1,r2)
572# define vfp_muli_f(r0,r1,i0) _vfp_muli_f(_jit,r0,r1,i0)
573static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
574# define vfp_mulr_d(r0,r1,r2) VMUL_F64(r0,r1,r2)
575# define vfp_muli_d(r0,r1,i0) _vfp_muli_d(_jit,r0,r1,i0)
576static void _vfp_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
577# define vfp_divr_f(r0,r1,r2) VDIV_F32(r0,r1,r2)
578# define vfp_divi_f(r0,r1,i0) _vfp_divi_f(_jit,r0,r1,i0)
579static void _vfp_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
580# define vfp_divr_d(r0,r1,r2) VDIV_F64(r0,r1,r2)
581# define vfp_divi_d(r0,r1,i0) _vfp_divi_d(_jit,r0,r1,i0)
582static void _vfp_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
583# define vfp_cmp_f(r0,r1) _vfp_cmp_f(_jit,r0,r1)
584static void _vfp_cmp_f(jit_state_t*,jit_int32_t,jit_int32_t);
585# define vfp_cmp_d(r0,r1) _vfp_cmp_d(_jit,r0,r1)
586static void _vfp_cmp_d(jit_state_t*,jit_int32_t,jit_int32_t);
587# define vcmp01_x(c0,c1,r0) _vcmp01_x(_jit,c0,c1,r0)
588static void _vcmp01_x(jit_state_t*,int,int,jit_int32_t);
589# define vcmp01_f(c0,c1,r0,r1,r2) _vcmp01_f(_jit,c0,c1,r0,r1,r2)
590static void _vcmp01_f(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
591# define vcmp01_d(c0,c1,r0,r1,r2) _vcmp01_d(_jit,c0,c1,r0,r1,r2)
592static void _vcmp01_d(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t);
593# define vfp_ltr_f(r0,r1,r2) vcmp01_f(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
594# define vfp_lti_f(r0,r1,i0) _vfp_lti_f(_jit,r0,r1,i0)
595static void _vfp_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
596# define vfp_ltr_d(r0,r1,r2) vcmp01_d(ARM_CC_PL,ARM_CC_MI,r0,r1,r2)
597# define vfp_lti_d(r0,r1,i0) _vfp_lti_d(_jit,r0,r1,i0)
598static void _vfp_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
599# define vfp_ler_f(r0,r1,r2) vcmp01_f(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
600# define vfp_lei_f(r0,r1,i0) _vfp_lei_f(_jit,r0,r1,i0)
601static void _vfp_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
602# define vfp_ler_d(r0,r1,r2) vcmp01_d(ARM_CC_HS,ARM_CC_LS,r0,r1,r2)
603# define vfp_lei_d(r0,r1,i0) _vfp_lei_d(_jit,r0,r1,i0)
604static void _vfp_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
605# define vfp_eqr_f(r0,r1,r2) vcmp01_f(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
606# define vfp_eqi_f(r0,r1,i0) _vfp_eqi_f(_jit,r0,r1,i0)
607static void _vfp_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
608# define vfp_eqr_d(r0,r1,r2) vcmp01_d(ARM_CC_NE,ARM_CC_EQ,r0,r1,r2)
609# define vfp_eqi_d(r0,r1,i0) _vfp_eqi_d(_jit,r0,r1,i0)
610static void _vfp_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
611# define vfp_ger_f(r0,r1,r2) vcmp01_f(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
612# define vfp_gei_f(r0,r1,i0) _vfp_gei_f(_jit,r0,r1,i0)
613static void _vfp_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
614# define vfp_ger_d(r0,r1,r2) vcmp01_d(ARM_CC_LT,ARM_CC_GE,r0,r1,r2)
615# define vfp_gei_d(r0,r1,i0) _vfp_gei_d(_jit,r0,r1,i0)
616static void _vfp_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
617# define vfp_gtr_f(r0,r1,r2) vcmp01_f(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
618# define vfp_gti_f(r0,r1,i0) _vfp_gti_f(_jit,r0,r1,i0)
619static void _vfp_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
620# define vfp_gtr_d(r0,r1,r2) vcmp01_d(ARM_CC_LE,ARM_CC_GT,r0,r1,r2)
621# define vfp_gti_d(r0,r1,i0) _vfp_gti_d(_jit,r0,r1,i0)
622static void _vfp_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
623# define vfp_ner_f(r0,r1,r2) vcmp01_f(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
624# define vfp_nei_f(r0,r1,i0) _vfp_nei_f(_jit,r0,r1,i0)
625static void _vfp_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
626# define vfp_ner_d(r0,r1,r2) vcmp01_d(ARM_CC_EQ,ARM_CC_NE,r0,r1,r2)
627# define vfp_nei_d(r0,r1,i0) _vfp_nei_d(_jit,r0,r1,i0)
628static void _vfp_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
629# define vcmp10_x(c0,r0) _vcmp10_x(_jit,c0,r0)
630static void _vcmp10_x(jit_state_t*,int,jit_int32_t);
631# define vcmp_10_f(c0,r0,r1,r2) _vcmp_10_f(_jit,c0,r0,r1,r2)
632static void _vcmp_10_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
633# define vcmp_10_d(c0,r0,r1,r2) _vcmp_10_d(_jit,c0,r0,r1,r2)
634static void _vcmp_10_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
635# define vfp_unltr_f(r0,r1,r2) vcmp_10_f(ARM_CC_GE,r0,r1,r2)
636# define vfp_unlti_f(r0,r1,i0) _vfp_unlti_f(_jit,r0,r1,i0)
637static void _vfp_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
638# define vfp_unltr_d(r0,r1,r2) vcmp_10_d(ARM_CC_GE,r0,r1,r2)
639# define vfp_unlti_d(r0,r1,i0) _vfp_unlti_d(_jit,r0,r1,i0)
640static void _vfp_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
641# define vfp_unler_f(r0,r1,r2) vcmp_10_f(ARM_CC_GT,r0,r1,r2)
642# define vfp_unlei_f(r0,r1,i0) _vfp_unlei_f(_jit,r0,r1,i0)
643static void _vfp_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
644# define vfp_unler_d(r0,r1,r2) vcmp_10_d(ARM_CC_GT,r0,r1,r2)
645# define vfp_unlei_d(r0,r1,i0) _vfp_unlei_d(_jit,r0,r1,i0)
646static void _vfp_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
647# define vfp_uneqr_x(r0) _vfp_uneqr_x(_jit,r0)
648static void _vfp_uneqr_x(jit_state_t*,jit_int32_t);
649# define vfp_uneqr_f(r0,r1,r2) _vfp_uneqr_f(_jit,r0,r1,r2)
650static void _vfp_uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
651# define vfp_uneqi_f(r0,r1,i0) _vfp_uneqi_f(_jit,r0,r1,i0)
652static void _vfp_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
653# define vfp_uneqr_d(r0,r1,r2) _vfp_uneqr_d(_jit,r0,r1,r2)
654static void _vfp_uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
655# define vfp_uneqi_d(r0,r1,i0) _vfp_uneqi_d(_jit,r0,r1,i0)
656static void _vfp_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
657# define vcmp_01_x(c0,r0) _vcmp_01_x(_jit,c0,r0)
658static void _vcmp_01_x(jit_state_t*,int,jit_int32_t);
659# define vcmp_01_f(c0,r0,r1,r2) _vcmp_01_f(_jit,c0,r0,r1,r2)
660static void _vcmp_01_f(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
661# define vcmp_01_d(c0,r0,r1,r2) _vcmp_01_d(_jit,c0,r0,r1,r2)
662static void _vcmp_01_d(jit_state_t*,int,jit_int32_t,jit_int32_t,jit_int32_t);
663# define vfp_unger_f(r0,r1,r2) vcmp_01_f(ARM_CC_CS,r0,r1,r2)
664# define vfp_ungei_f(r0,r1,i0) _vfp_ungei_f(_jit,r0,r1,i0)
665static void _vfp_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
666# define vfp_unger_d(r0,r1,r2) vcmp_01_d(ARM_CC_CS,r0,r1,r2)
667# define vfp_ungei_d(r0,r1,i0) _vfp_ungei_d(_jit,r0,r1,i0)
668static void _vfp_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
669# define vfp_ungtr_f(r0,r1,r2) vcmp_01_f(ARM_CC_HI,r0,r1,r2)
670# define vfp_ungti_f(r0,r1,i0) _vfp_ungti_f(_jit,r0,r1,i0)
671static void _vfp_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
672# define vfp_ungtr_d(r0,r1,r2) vcmp_01_d(ARM_CC_HI,r0,r1,r2)
673# define vfp_ungti_d(r0,r1,i0) _vfp_ungti_d(_jit,r0,r1,i0)
674static void _vfp_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
675# define vfp_ltgtr_x(r0) _vfp_ltgtr_x(_jit,r0)
676static void _vfp_ltgtr_x(jit_state_t*,jit_int32_t);
677# define vfp_ltgtr_f(r0,r1,r2) _vfp_ltgtr_f(_jit,r0,r1,r2)
678static void _vfp_ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
679# define vfp_ltgti_f(r0,r1,i0) _vfp_ltgti_f(_jit,r0,r1,i0)
680static void _vfp_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
681# define vfp_ltgtr_d(r0,r1,r2) _vfp_ltgtr_d(_jit,r0,r1,r2)
682static void _vfp_ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
683# define vfp_ltgti_d(r0,r1,i0) _vfp_ltgti_d(_jit,r0,r1,i0)
684static void _vfp_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
685# define vfp_ordr_f(r0,r1,r2) _vfp_ordr_f(_jit,r0,r1,r2)
686static void _vfp_ordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
687# define vfp_ordi_f(r0,r1,i0) _vfp_ordi_f(_jit,r0,r1,i0)
688static void _vfp_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
689# define vfp_ordr_d(r0,r1,r2) _vfp_ordr_d(_jit,r0,r1,r2)
690static void _vfp_ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
691# define vfp_ordi_d(r0,r1,i0) _vfp_ordi_d(_jit,r0,r1,i0)
692static void _vfp_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
693# define vfp_unordr_f(r0,r1,r2) _vfp_unordr_f(_jit,r0,r1,r2)
694static void _vfp_unordr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
695# define vfp_unordi_f(r0,r1,i0) _vfp_unordi_f(_jit,r0,r1,i0)
696static void _vfp_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
697# define vfp_unordr_d(r0,r1,r2) _vfp_unordr_d(_jit,r0,r1,r2)
698static void _vfp_unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
699# define vfp_unordi_d(r0,r1,i0) _vfp_unordi_d(_jit,r0,r1,i0)
700static void _vfp_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
701# define vbcmp_x(cc,i0) _vbcmp_x(_jit,cc,i0)
702static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
703# define vbcmp_f(cc,i0,r0,r1) _vbcmp_f(_jit,cc,i0,r0,r1)
704static jit_word_t
705_vbcmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
706# define vbcmp_x(cc,i0) _vbcmp_x(_jit,cc,i0)
707static jit_word_t _vbcmp_x(jit_state_t*,int,jit_word_t);
708# define vbcmp_d(cc,i0,r0,r1) _vbcmp_d(_jit,cc,i0,r0,r1)
709static jit_word_t
710_vbcmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
711# define vfp_bltr_f(i0,r0,r1) vbcmp_f(ARM_CC_MI,i0,r0,r1)
712# define vfp_blti_f(i0,r0,i1) _vfp_blti_f(_jit,i0,r0,i1)
713static jit_word_t _vfp_blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
714# define vfp_bltr_d(i0,r0,r1) vbcmp_d(ARM_CC_MI,i0,r0,r1)
715static jit_word_t _vfp_blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
716# define vfp_blti_d(i0,r0,i1) _vfp_blti_d(_jit,i0,r0,i1)
717# define vfp_bler_f(i0,r0,r1) vbcmp_f(ARM_CC_LS,i0,r0,r1)
718# define vfp_blei_f(i0,r0,i1) _vfp_blei_f(_jit,i0,r0,i1)
719static jit_word_t _vfp_blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
720# define vfp_bler_d(i0,r0,r1) vbcmp_d(ARM_CC_LS,i0,r0,r1)
721# define vfp_blei_d(i0,r0,i1) _vfp_blei_d(_jit,i0,r0,i1)
722static jit_word_t _vfp_blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
723# define vfp_beqr_f(i0,r0,r1) vbcmp_f(ARM_CC_EQ,i0,r0,r1)
724# define vfp_beqi_f(i0,r0,i1) _vfp_beqi_f(_jit,i0,r0,i1)
725static jit_word_t _vfp_beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
726# define vfp_beqr_d(i0,r0,r1) vbcmp_d(ARM_CC_EQ,i0,r0,r1)
727# define vfp_beqi_d(i0,r0,i1) _vfp_beqi_d(_jit,i0,r0,i1)
728static jit_word_t _vfp_beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
729# define vfp_bger_f(i0,r0,r1) vbcmp_f(ARM_CC_GE,i0,r0,r1)
730# define vfp_bgei_f(i0,r0,i1) _vfp_bgei_f(_jit,i0,r0,i1)
731static jit_word_t _vfp_bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
732# define vfp_bger_d(i0,r0,r1) vbcmp_d(ARM_CC_GE,i0,r0,r1)
733# define vfp_bgei_d(i0,r0,i1) _vfp_bgei_d(_jit,i0,r0,i1)
734static jit_word_t _vfp_bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
735# define vfp_bgtr_f(i0,r0,r1) vbcmp_f(ARM_CC_GT,i0,r0,r1)
736# define vfp_bgti_f(i0,r0,i1) _vfp_bgti_f(_jit,i0,r0,i1)
737static jit_word_t _vfp_bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
738# define vfp_bgtr_d(i0,r0,r1) vbcmp_d(ARM_CC_GT,i0,r0,r1)
739# define vfp_bgti_d(i0,r0,i1) _vfp_bgti_d(_jit,i0,r0,i1)
740static jit_word_t _vfp_bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
741# define vfp_bner_f(i0,r0,r1) vbcmp_f(ARM_CC_NE,i0,r0,r1)
742# define vfp_bnei_f(i0,r0,i1) _vfp_bnei_f(_jit,i0,r0,i1)
743static jit_word_t _vfp_bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
744# define vfp_bner_d(i0,r0,r1) vbcmp_d(ARM_CC_NE,i0,r0,r1)
745# define vfp_bnei_d(i0,r0,i1) _vfp_bnei_d(_jit,i0,r0,i1)
746static jit_word_t _vfp_bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
747# define vbncmp_x(cc,i0) _vbncmp_x(_jit,cc,i0)
748static jit_word_t _vbncmp_x(jit_state_t*,int,jit_word_t);
749# define vbncmp_f(cc,i0,r0,r1) _vbncmp_f(_jit,cc,i0,r0,r1)
750static jit_word_t
751_vbncmp_f(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
752# define vbncmp_d(cc,i0,r0,r1) _vbncmp_d(_jit,cc,i0,r0,r1)
753static jit_word_t
754_vbncmp_d(jit_state_t*,int,jit_word_t,jit_int32_t,jit_int32_t);
755# define vfp_bunltr_f(i0,r0,r1) vbncmp_f(ARM_CC_GE,i0,r0,r1)
756# define vfp_bunlti_f(i0,r0,i1) _vfp_bunlti_f(_jit,i0,r0,i1)
757static jit_word_t
758_vfp_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
759# define vfp_bunltr_d(i0,r0,r1) vbncmp_d(ARM_CC_GE,i0,r0,r1)
760# define vfp_bunlti_d(i0,r0,i1) _vfp_bunlti_d(_jit,i0,r0,i1)
761static jit_word_t
762_vfp_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
763# define vfp_bunler_f(i0,r0,r1) vbncmp_f(ARM_CC_GT,i0,r0,r1)
764# define vfp_bunlei_f(i0,r0,i1) _vfp_bunlei_f(_jit,i0,r0,i1)
765static jit_word_t
766_vfp_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
767# define vfp_bunler_d(i0,r0,r1) vbncmp_d(ARM_CC_GT,i0,r0,r1)
768# define vfp_bunlei_d(i0,r0,i1) _vfp_bunlei_d(_jit,i0,r0,i1)
769static jit_word_t
770_vfp_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
771# define vfp_buneqr_x(i0) _vfp_buneqr_x(_jit,i0)
772static jit_word_t _vfp_buneqr_x(jit_state_t*,jit_word_t);
773# define vfp_buneqr_f(i0,r0,r1) _vfp_buneqr_f(_jit,i0,r0,r1)
774static jit_word_t
775_vfp_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
776# define vfp_buneqi_f(i0,r0,i1) _vfp_buneqi_f(_jit,i0,r0,i1)
777static jit_word_t
778_vfp_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
779# define vfp_buneqr_d(i0,r0,r1) _vfp_buneqr_d(_jit,i0,r0,r1)
780static jit_word_t
781_vfp_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
782# define vfp_buneqi_d(i0,r0,i1) _vfp_buneqi_d(_jit,i0,r0,i1)
783static jit_word_t
784_vfp_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
785# define vfp_bunger_x(i0) _vfp_bunger_x(_jit,i0)
786static jit_word_t _vfp_bunger_x(jit_state_t*,jit_word_t);
787# define vfp_bunger_f(i0,r0,r1) _vfp_bunger_f(_jit,i0,r0,r1)
788static jit_word_t
789_vfp_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
790# define vfp_bungei_f(i0,r0,i1) _vfp_bungei_f(_jit,i0,r0,i1)
791static jit_word_t
792_vfp_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
793# define vfp_bunger_d(i0,r0,r1) _vfp_bunger_d(_jit,i0,r0,r1)
794static jit_word_t
795_vfp_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
796# define vfp_bungei_d(i0,r0,i1) _vfp_bungei_d(_jit,i0,r0,i1)
797static jit_word_t
798_vfp_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
799# define vfp_bungtr_f(i0,r0,r1) vbcmp_f(ARM_CC_HI,i0,r0,r1)
800# define vfp_bungti_f(i0,r0,i1) _vfp_bungti_f(_jit,i0,r0,i1)
801static jit_word_t
802_vfp_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
803# define vfp_bungtr_d(i0,r0,r1) vbcmp_d(ARM_CC_HI,i0,r0,r1)
804# define vfp_bungti_d(i0,r0,i1) _vfp_bungti_d(_jit,i0,r0,i1)
805static jit_word_t
806_vfp_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
807# define vfp_bltgtr_x(i0) _vfp_bltgtr_x(_jit,i0)
808static jit_word_t _vfp_bltgtr_x(jit_state_t*,jit_word_t);
809# define vfp_bltgtr_f(i0,r0,r1) _vfp_bltgtr_f(_jit,i0,r0,r1)
810static jit_word_t
811_vfp_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
812# define vfp_bltgti_f(i0,r0,i1) _vfp_bltgti_f(_jit,i0,r0,i1)
813static jit_word_t
814_vfp_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
815# define vfp_bltgtr_d(i0,r0,r1) _vfp_bltgtr_d(_jit,i0,r0,r1)
816static jit_word_t
817_vfp_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
818# define vfp_bltgti_d(i0,r0,i1) _vfp_bltgti_d(_jit,i0,r0,i1)
819static jit_word_t
820_vfp_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
821# define vfp_bordr_f(i0,r0,r1) vbcmp_f(ARM_CC_VC,i0,r0,r1)
822# define vfp_bordi_f(i0,r0,i1) _vfp_bordi_f(_jit,i0,r0,i1)
823static jit_word_t
824_vfp_bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
825# define vfp_bordr_d(i0,r0,r1) vbcmp_d(ARM_CC_VC,i0,r0,r1)
826# define vfp_bordi_d(i0,r0,i1) _vfp_bordi_d(_jit,i0,r0,i1)
827static jit_word_t
828_vfp_bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
829# define vfp_bunordr_f(i0,r0,r1) vbcmp_f(ARM_CC_VS,i0,r0,r1)
830# define vfp_bunordi_f(i0,r0,i1) _vfp_bunordi_f(_jit,i0,r0,i1)
831static jit_word_t
832_vfp_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
833# define vfp_bunordr_d(i0,r0,r1) vbcmp_d(ARM_CC_VS,i0,r0,r1)
834# define vfp_bunordi_d(i0,r0,i1) _vfp_bunordi_d(_jit,i0,r0,i1)
835static jit_word_t
836_vfp_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
837# define vfp_ldr_f(r0,r1) VLDR_F32(r0,r1,0)
838# define vfp_ldr_d(r0,r1) VLDR_F64(r0,r1,0)
839# define vfp_ldi_f(r0,i0) _vfp_ldi_f(_jit,r0,i0)
840static void _vfp_ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
841# define vfp_ldi_d(r0,i0) _vfp_ldi_d(_jit,r0,i0)
842static void _vfp_ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
843# define vfp_ldxr_f(r0,r1,r2) _vfp_ldxr_f(_jit,r0,r1,r2)
844static void _vfp_ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
845# define vfp_ldxr_d(r0,r1,r2) _vfp_ldxr_d(_jit,r0,r1,r2)
846static void _vfp_ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
847# define vfp_ldxi_f(r0,r1,i0) _vfp_ldxi_f(_jit,r0,r1,i0)
848static void _vfp_ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
849# define vfp_ldxi_d(r0,r1,i0) _vfp_ldxi_d(_jit,r0,r1,i0)
850static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
ba86ff93
PC
851# define vfp_unldr_x(r0, r1, i0) _vfp_unldr_x(_jit, r0, r1, i0)
852static void _vfp_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
853# define vfp_unldi_x(r0, i0, i1) _vfp_unldi_x(_jit, r0, i0, i1)
854static void _vfp_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
4a71579b
PC
855# define vfp_str_f(r0,r1) VSTR_F32(r1,r0,0)
856# define vfp_str_d(r0,r1) VSTR_F64(r1,r0,0)
857# define vfp_sti_f(i0,r0) _vfp_sti_f(_jit,i0,r0)
858static void _vfp_sti_f(jit_state_t*,jit_word_t,jit_int32_t);
859# define vfp_sti_d(i0,r0) _vfp_sti_d(_jit,i0,r0)
860static void _vfp_sti_d(jit_state_t*,jit_word_t,jit_int32_t);
861# define vfp_stxr_f(r0,r1,r2) _vfp_stxr_f(_jit,r0,r1,r2)
862static void _vfp_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
863# define vfp_stxr_d(r0,r1,r2) _vfp_stxr_d(_jit,r0,r1,r2)
864static void _vfp_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
865# define vfp_stxi_f(i0,r0,r1) _vfp_stxi_f(_jit,i0,r0,r1)
866static void _vfp_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
867# define vfp_stxi_d(i0,r0,r1) _vfp_stxi_d(_jit,i0,r0,r1)
868static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
ba86ff93
PC
869#define vfp_unstr_x(r0, r1, i0) _vfp_unstr_x(_jit, r0, r1, i0)
870static void _vfp_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
871#define vfp_unsti_x(i0, r0, i1) _vfp_unsti_x(_jit, i0, r0, i1)
872static void _vfp_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
4a71579b
PC
873# define vfp_vaarg_d(r0, r1) _vfp_vaarg_d(_jit, r0, r1)
874static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
875#endif
876
877#if CODE
878# define vfp_regno(rn) (((rn) - 16) >> 1)
879
880static int
881encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi)
882{
883 int code, mode, imm, mask;
884
885 if (hi != lo) {
886 if (mov && !inv) {
887 /* (I64)
888 * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
889 */
890 for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
891 imm = lo & mask;
892 if (imm != mask && imm != 0)
893 goto fail;
894 imm = hi & mask;
895 if (imm != mask && imm != 0)
896 goto fail;
897 }
898 mode = 0xe20;
899 imm = (((hi & 0x80000000) >> 24) | ((hi & 0x00800000) >> 17) |
900 ((hi & 0x00008000) >> 10) | ((hi & 0x00000080) >> 3) |
901 ((lo & 0x80000000) >> 28) | ((lo & 0x00800000) >> 21) |
902 ((lo & 0x00008000) >> 14) | ((lo & 0x00000080) >> 7));
903 goto success;
904 }
905 goto fail;
906 }
907 /* (I32)
908 * 00000000 00000000 00000000 abcdefgh
909 * 00000000 00000000 abcdefgh 00000000
910 * 00000000 abcdefgh 00000000 00000000
911 * abcdefgh 00000000 00000000 00000000 */
912 for (mode = 0, mask = 0xff; mode < 4; mask <<= 8, mode++) {
913 if ((lo & mask) == lo) {
914 imm = lo >> (mode << 3);
915 mode <<= 9;
916 goto success;
917 }
918 }
919 /* (I16)
920 * 00000000 abcdefgh 00000000 abcdefgh
921 * abcdefgh 00000000 abcdefgh 00000000 */
922 for (mode = 0, mask = 0xff; mode < 2; mask <<= 8, mode++) {
923 if ((lo & mask) && ((lo & (mask << 16)) >> 16) == (lo & mask)) {
924 imm = lo >> (mode << 3);
925 mode = 0x800 | (mode << 9);
926 goto success;
927 }
928 }
929 if (mov) {
930 /* (I32)
931 * 00000000 00000000 abcdefgh 11111111
932 * 00000000 abcdefgh 11111111 11111111 */
933 for (mode = 0, mask = 0xff; mode < 2;
934 mask = (mask << 8) | 0xff, mode++) {
935 if ((lo & mask) == mask &&
936 !((lo & ~mask) >> 8) &&
937 (imm = lo >> (8 + (mode << 8)))) {
938 mode = 0xc00 | (mode << 8);
939 goto success;
940 }
941 }
942 if (!inv) {
943 /* (F32)
944 * aBbbbbbc defgh000 00000000 00000000
945 * from the ARM Architecture Reference Manual:
946 * In this entry, B = NOT(b). The bit pattern represents the
947 * floating-point number (-1)^s* 2^exp * mantissa, where
948 * S = UInt(a),
949 * exp = UInt(NOT(b):c:d)-3 and
950 * mantissa = (16+UInt(e:f:g:h))/16. */
951 if ((lo & 0x7ffff) == 0 &&
952 (((lo & 0x7e000000) == 0x3e000000) ||
953 ((lo & 0x7e000000) == 0x40000000))) {
954 mode = 0xf00;
955 imm = ((lo >> 24) & 0x80) | ((lo >> 19) & 0x7f);
956 goto success;
957 }
958 }
959 }
960
961fail:
962 /* need another approach (load from memory, move from arm register, etc) */
963 return (-1);
964
965success:
966 code = inv ? ARM_VMVNI : ARM_VMOVI;
967 switch ((mode & 0xf00) >> 8) {
968 case 0x0: case 0x2: case 0x4: case 0x6:
969 case 0x8: case 0xa:
970 if (inv) mode |= 0x20;
971 if (!mov) mode |= 0x100;
972 break;
973 case 0x1: case 0x3: case 0x5: case 0x7:
974 /* should actually not reach here */
975 assert(!inv);
976 case 0x9: case 0xb:
977 assert(!mov);
978 break;
979 case 0xc: case 0xd:
980 /* should actually not reach here */
981 assert(inv);
982 case 0xe:
983 assert(mode & 0x20);
984 assert(mov && !inv);
985 break;
986 default:
987 assert(!(mode & 0x20));
988 break;
989 }
990 imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f);
991 code |= mode | imm;
992 if (jit_thumb_p()) {
993 if (code & 0x1000000)
994 code |= 0xff000000;
995 else
996 code |= 0xef000000;
997 }
998 else
999 code |= ARM_CC_NV;
1000 return (code);
1001}
1002
1003static void
1004_vodi(jit_state_t *_jit, int oi, int r0)
1005{
1006 jit_thumb_t thumb;
1007 assert(!(oi & 0x0000f000));
1008 assert(!(r0 & 1)); r0 = vfp_regno(r0);
1009 thumb.i = oi|(_u4(r0)<<12);
1010 if (jit_thumb_p())
1011 iss(thumb.s[0], thumb.s[1]);
1012 else
1013 ii(thumb.i);
1014}
1015
1016static void
1017_voqi(jit_state_t *_jit, int oi, int r0)
1018{
1019 jit_thumb_t thumb;
1020 assert(!(oi & 0x0000f000));
1021 assert(!(r0 & 3)); r0 = vfp_regno(r0);
1022 thumb.i = oi|(_u4(r0)<<12);
1023 if (jit_thumb_p())
1024 iss(thumb.s[0], thumb.s[1]);
1025 else
1026 ii(thumb.i);
1027}
1028
1029static void
1030_cc_vo_ss(jit_state_t *_jit, int cc, int o, int r0, int r1)
1031{
1032 jit_thumb_t thumb;
1033 assert(!(cc & 0x0fffffff));
1034 assert(!(o & 0xf000f00f));
1035 if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0);
1036 if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1);
1037 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1038 if (jit_thumb_p())
1039 iss(thumb.s[0], thumb.s[1]);
1040 else
1041 ii(thumb.i);
1042}
1043
1044static void
1045_cc_vo_dd(jit_state_t *_jit, int cc, int o, int r0, int r1)
1046{
1047 jit_thumb_t thumb;
1048 assert(!(cc & 0x0fffffff));
1049 assert(!(o & 0xf000f00f));
1050 assert(!(r0 & 1) && !(r1 & 1));
1051 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1052 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1053 if (jit_thumb_p())
1054 iss(thumb.s[0], thumb.s[1]);
1055 else
1056 ii(thumb.i);
1057}
1058
1059static void
1060_cc_vo_qd(jit_state_t *_jit, int cc, int o, int r0, int r1)
1061{
1062 jit_thumb_t thumb;
1063 assert(!(cc & 0x0fffffff));
1064 assert(!(o & 0xf000f00f));
1065 assert(!(r0 & 3) && !(r1 & 1));
1066 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1067 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1068 if (jit_thumb_p())
1069 iss(thumb.s[0], thumb.s[1]);
1070 else
1071 ii(thumb.i);
1072}
1073
1074static void
1075_cc_vo_qq(jit_state_t *_jit, int cc, int o, int r0, int r1)
1076{
1077 jit_thumb_t thumb;
1078 assert(!(cc & 0x0fffffff));
1079 assert(!(o & 0xf000f00f));
1080 assert(!(r0 & 3) && !(r1 & 3));
1081 r0 = vfp_regno(r0); r1 = vfp_regno(r1);
1082 thumb.i = cc|o|(_u4(r0)<<12)|_u4(r1);
1083 if (jit_thumb_p())
1084 iss(thumb.s[0], thumb.s[1]);
1085 else
1086 ii(thumb.i);
1087}
1088
1089static void
1090_cc_vorr_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1091{
1092 jit_thumb_t thumb;
1093 assert(!(cc & 0x0fffffff));
1094 assert(!(o & 0xf000f00f));
1095 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1096 if (jit_thumb_p())
1097 iss(thumb.s[0], thumb.s[1]);
1098 else
1099 ii(thumb.i);
1100}
1101
1102static void
1103_cc_vors_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1104{
1105 jit_thumb_t thumb;
1106 assert(!(cc & 0x0fffffff));
1107 assert(!(o & 0xf000f00f));
1108 if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1);
1109 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1110 if (jit_thumb_p())
1111 iss(thumb.s[0], thumb.s[1]);
1112 else
1113 ii(thumb.i);
1114}
1115
1116static void
1117_cc_vorv_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1118{
1119 jit_thumb_t thumb;
1120 assert(!(cc & 0x0fffffff));
1121 assert(!(o & 0xf000f00f));
1122 if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1);
1123 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1124 if (jit_thumb_p())
1125 iss(thumb.s[0], thumb.s[1]);
1126 else
1127 ii(thumb.i);
1128}
1129
ba86ff93
PC
1130static void
1131_cc_vo_vv(jit_state_t *_jit, int cc, int o, int r0, int r1)
1132{
1133 jit_thumb_t thumb;
1134 assert(!(cc & 0x0fffffff));
1135 assert(!(o & 0xf000f00f));
1136 r0 = vfp_regno(r0);
1137 r1 = vfp_regno(r1);
1138 thumb.i = cc|o|(_u4(r1)<<12)|_u4(r0);
1139 if (jit_thumb_p())
1140 iss(thumb.s[0], thumb.s[1]);
1141 else
1142 ii(thumb.i);
1143}
1144
4a71579b
PC
1145static void
1146_cc_vori_(jit_state_t *_jit, int cc, int o, int r0, int r1)
1147{
1148 jit_thumb_t thumb;
1149 assert(!(cc & 0x0fffffff));
1150 assert(!(o & 0xf000f00f));
1151 /* use same bit pattern, to set opc1... */
1152 if (r1 & 1) o |= ARM_V_I32; r1 = vfp_regno(r1);
1153 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12);
1154 if (jit_thumb_p())
1155 iss(thumb.s[0], thumb.s[1]);
1156 else
1157 ii(thumb.i);
1158}
1159
1160static void
1161_cc_vorrd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1162{
1163 jit_thumb_t thumb;
1164 assert(!(cc & 0x0fffffff));
1165 assert(!(o & 0xf00ff00f));
1166 assert(!(r2 & 1));
1167 r2 = vfp_regno(r2);
1168 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1169 if (jit_thumb_p())
1170 iss(thumb.s[0], thumb.s[1]);
1171 else
1172 ii(thumb.i);
1173}
1174
1175static void
1176_cc_vosss(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1177{
1178 jit_thumb_t thumb;
1179 assert(!(cc & 0x0fffffff));
1180 assert(!(o & 0xf00ff00f));
1181 if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0);
1182 if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1);
1183 if (r2 & 1) o |= ARM_V_M; r2 = vfp_regno(r2);
1184 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1185 if (jit_thumb_p())
1186 iss(thumb.s[0], thumb.s[1]);
1187 else
1188 ii(thumb.i);
1189}
1190
1191static void
1192_cc_voddd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1193{
1194 jit_thumb_t thumb;
1195 assert(!(cc & 0x0fffffff));
1196 assert(!(o & 0xf00ff00f));
1197 assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1));
1198 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1199 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1200 if (jit_thumb_p())
1201 iss(thumb.s[0], thumb.s[1]);
1202 else
1203 ii(thumb.i);
1204}
1205
1206static void
1207_cc_voqdd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1208{
1209 jit_thumb_t thumb;
1210 assert(!(cc & 0x0fffffff));
1211 assert(!(o & 0xf00ff00f));
1212 assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1));
1213 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1214 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1215 if (jit_thumb_p())
1216 iss(thumb.s[0], thumb.s[1]);
1217 else
1218 ii(thumb.i);
1219}
1220
1221static void
1222_cc_voqqd(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1223{
1224 jit_thumb_t thumb;
1225 assert(!(cc & 0x0fffffff));
1226 assert(!(o & 0xf00ff00f));
1227 assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1));
1228 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1229 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1230 if (jit_thumb_p())
1231 iss(thumb.s[0], thumb.s[1]);
1232 else
1233 ii(thumb.i);
1234}
1235
1236static void
1237_cc_voqqq(jit_state_t *_jit, int cc, int o, int r0, int r1, int r2)
1238{
1239 jit_thumb_t thumb;
1240 assert(!(cc & 0x0fffffff));
1241 assert(!(o & 0xf00ff00f));
1242 assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3));
1243 r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2);
1244 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2);
1245 if (jit_thumb_p())
1246 iss(thumb.s[0], thumb.s[1]);
1247 else
1248 ii(thumb.i);
1249}
1250
1251static void
1252_cc_vldst(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1253{
1254 jit_thumb_t thumb;
1255 /* i0 << 2 is byte offset */
1256 assert(!(cc & 0x0fffffff));
1257 assert(!(o & 0xf00ff0ff));
1258 if (r0 & 1) {
1259 assert(!(o & ARM_V_F64));
1260 o |= ARM_V_D;
1261 }
1262 r0 = vfp_regno(r0);
1263 thumb.i = cc|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0);
1264 if (jit_thumb_p())
1265 iss(thumb.s[0], thumb.s[1]);
1266 else
1267 ii(thumb.i);
1268}
1269
1270static void
1271_cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
1272{
1273 jit_thumb_t thumb;
1274 assert(!(cc & 0x0fffffff));
1275 assert(!(o & 0xf00ff0ff));
1276 /* save i0 double precision registers */
1277 if (o & ARM_V_F64) i0 <<= 1;
1278 /* if (r1 & 1) cc & ARM_V_F64 must be false */
1279 if (r1 & 1) o |= ARM_V_D; r1 = vfp_regno(r1);
1280 assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
1281 thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
1282 if (jit_thumb_p())
1283 iss(thumb.s[0], thumb.s[1]);
1284 else
1285 ii(thumb.i);
1286}
1287
1288static void
ba86ff93 1289_vfp_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b 1290{
ba86ff93
PC
1291 jit_int32_t reg;
1292 reg = jit_get_reg(jit_class_fpr);
1293 VMOV_S_A(rn(reg), r1);
1294 VCNT(rn(reg), rn(reg));
1295 VADD_I8(rn(reg), rn(reg), rn(reg));
1296 VMOV_A_S(r0, rn(reg));
1297 jit_unget_reg(reg);
4a71579b
PC
1298}
1299
1300static void
ba86ff93 1301_vfp_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
4a71579b 1302{
ba86ff93
PC
1303 assert(jit_fpr_p(r0) && jit_fpr_p(r1));
1304 if (r0 != r1)
1305 VMOV_F32(r0, r1);
4a71579b
PC
1306}
1307
1308static void
1309_vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
1310{
1311 union {
1312 jit_int32_t i;
1313 jit_float32_t f;
1314 } u;
1315 jit_int32_t reg;
1316 jit_int32_t code;
1317 u.f = i0;
ba86ff93
PC
1318 assert(jit_fpr_p(r0));
1319 /* float arguments are packed, for others,
1320 * lightning only address even registers */
1321 if (!(r0 & 1) && (r0 - 32) >= 0 &&
1322 ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
1323 (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
1324 VIMM(code, r0);
1325 else {
1326 reg = jit_get_reg(jit_class_gpr);
1327 movi(rn(reg), u.i);
1328 VMOV_S_A(r0, rn(reg));
1329 jit_unget_reg(reg);
4a71579b 1330 }
ba86ff93
PC
1331}
1332
1333static void
1334_vfp_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1335{
1336 assert(jit_fpr_p(r0) && jit_fpr_p(r1));
1337 if (r0 != r1)
1338 VMOV_F64(r0, r1);
1339}
1340
1341static void
1342_vfp_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1343{
1344 jit_int32_t reg;
1345 reg = jit_get_reg(jit_class_gpr);
1346 movi(rn(reg), i0);
1347 vfp_movr_w_f(r0, rn(reg));
1348 jit_unget_reg(reg);
1349}
1350
1351static void
1352_vfp_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
1353{
1354 jit_int32_t t0, t1;
1355 t0 = jit_get_reg(jit_class_gpr);
1356 t1 = jit_get_reg(jit_class_gpr);
1357 movi(rn(t0), i0);
1358 movi(rn(t1), i1);
1359 vfp_movr_ww_d(r0, rn(t0), rn(t1));
1360 jit_unget_reg(t1);
1361 jit_unget_reg(t0);
4a71579b
PC
1362}
1363
1364static void
1365_vfp_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
1366{
1367 union {
1368 jit_int32_t i[2];
1369 jit_float64_t d;
1370 } u;
1371 jit_int32_t code;
1372 jit_int32_t rg0, rg1;
1373 u.d = i0;
ba86ff93
PC
1374# if __BYTE_ORDER == __BIG_ENDIAN
1375 code = u.i[0];
1376 u.i[0] = u.i[1];
1377 u.i[1] = code;
1378# endif
1379 assert(jit_fpr_p(r0));
1380 if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 ||
1381 (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1)
1382 VIMM(code, r0);
4a71579b 1383 else {
ba86ff93
PC
1384 rg0 = jit_get_reg(jit_class_gpr);
1385 rg1 = jit_get_reg(jit_class_gpr);
1386 movi(rn(rg0), u.i[0]);
1387 movi(rn(rg1), u.i[1]);
1388 VMOV_D_AA(r0, rn(rg0), rn(rg1));
1389 jit_unget_reg(rg1);
1390 jit_unget_reg(rg0);
4a71579b
PC
1391 }
1392}
1393
1394static void
1395_vfp_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1396{
1397 jit_int32_t reg;
1398 if (jit_fpr_p(r1)) {
1399 if (jit_fpr_p(r0))
1400 VCVT_F64_F32(r0, r1);
1401 else {
1402 reg = jit_get_reg(jit_class_fpr);
1403 VCVT_F64_F32(rn(reg), r1);
1404 VMOV_A_S(r0, rn(reg));
1405 jit_unget_reg(reg);
1406 }
1407 }
1408 else {
1409 reg = jit_get_reg(jit_class_fpr);
1410 VMOV_S_A(rn(reg), r1);
1411 VCVT_F64_F32(rn(reg), rn(reg));
1412 if (jit_fpr_p(r0))
1413 VMOV_F32(r0, rn(reg));
1414 else
1415 VMOV_A_S(r0, rn(reg));
1416 jit_unget_reg(reg);
1417 }
1418}
1419
1420static void
1421_vfp_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1422{
1423 jit_int32_t reg;
1424 if (jit_fpr_p(r1)) {
1425 if (jit_fpr_p(r0))
1426 VCVT_F32_F64(r0, r1);
1427 else {
1428 reg = jit_get_reg(jit_class_fpr);
1429 VCVT_F32_F64(rn(reg), r1);
1430 VMOV_AA_D(r0, r0 + 1, rn(reg));
1431 jit_unget_reg(reg);
1432 }
1433 }
1434 else {
1435 reg = jit_get_reg(jit_class_fpr);
1436 VMOV_D_AA(rn(reg), r1, r1 + 1);
1437 VCVT_F32_F64(rn(reg), rn(reg));
1438 if (jit_fpr_p(r0))
1439 VMOV_F64(r0, rn(reg));
1440 else
1441 VMOV_AA_D(r0, r0 + 1, rn(reg));
1442 jit_unget_reg(reg);
1443 }
1444}
1445
1446static void
1447_vfp_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1448{
1449 jit_int32_t reg;
1450 if (jit_fpr_p(r0)) {
1451 VMOV_V_I32(r0, r1);
1452 VCVT_F32_S32(r0, r0);
1453 }
1454 else {
1455 reg = jit_get_reg(jit_class_fpr);
1456 VMOV_V_I32(rn(reg), r1);
1457 VCVT_F32_S32(rn(reg), rn(reg));
1458 VMOV_F32(r0, rn(reg));
1459 jit_unget_reg(reg);
1460 }
1461}
1462
1463static void
1464_vfp_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1465{
1466 jit_int32_t reg;
1467 if (jit_fpr_p(r0)) {
1468 VMOV_V_I32(r0, r1);
1469 VCVT_F64_S32(r0, r0);
1470 }
1471 else {
1472 reg = jit_get_reg(jit_class_fpr);
1473 VMOV_V_I32(rn(reg), r1);
1474 VCVT_F64_S32(rn(reg), rn(reg));
1475 VMOV_F64(r0, rn(reg));
1476 jit_unget_reg(reg);
1477 }
1478}
1479
1480static void
1481_vfp_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1482{
1483 jit_int32_t reg;
1484 reg = jit_get_reg(jit_class_fpr);
1485 if (jit_fpr_p(r1))
1486 VCVT_S32_F32(rn(reg), r1);
1487 else {
1488 VMOV_V_I32(rn(reg), r1);
1489 VCVT_S32_F32(rn(reg), rn(reg));
1490 }
1491 VMOV_A_S32(r0, rn(reg));
1492 jit_unget_reg(reg);
1493}
1494
1495static void
1496_vfp_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1497{
1498 jit_int32_t reg;
1499 reg = jit_get_reg(jit_class_fpr);
1500 if (jit_fpr_p(r1))
1501 VCVT_S32_F64(rn(reg), r1);
1502 else {
1503 VMOV_V_I32(rn(reg), r1);
1504 VCVT_S32_F64(rn(reg), rn(reg));
1505 }
1506 VMOV_A_S32(r0, rn(reg));
1507 jit_unget_reg(reg);
1508}
1509
ba86ff93
PC
1510static void
1511_vfp_fmar_f(jit_state_t *_jit,
1512 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1513{
1514 jit_int32_t t0;
1515 /* untested */
1516 if (0 && jit_cpu.vfp >= 4) {
1517 if (r0 != r2 && r0 != r3) {
1518 vfp_movr_f(r0, r1);
1519 VFMA_F32(r0, r2, r3);
1520 }
1521 else {
1522 t0 = jit_get_reg(jit_class_fpr);
1523 vfp_movr_f(rn(t0), r1);
1524 VFMA_F32(rn(t0), r2, r3);
1525 vfp_movr_f(r0, rn(t0));
1526 jit_unget_reg(t0);
1527 }
1528 }
1529 else {
1530 if (r0 != r3) {
1531 vfp_mulr_f(r0, r1, r2);
1532 vfp_addr_f(r0, r0, r3);
1533 }
1534 else {
1535 t0 = jit_get_reg(jit_class_fpr);
1536 vfp_mulr_f(rn(t0), r1, r2);
1537 vfp_addr_f(r0, rn(t0), r3);
1538 jit_unget_reg(t0);
1539 }
1540 }
1541}
1542
1543static void
1544_vfp_fmsr_f(jit_state_t *_jit,
1545 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1546{
1547 jit_int32_t t0;
1548 /* untested */
1549 if (0 && jit_cpu.vfp >= 4) {
1550 if (r0 != r2 && r0 != r3) {
1551 vfp_movr_f(r0, r1);
1552 VFMS_F32(r0, r2, r3);
1553 }
1554 else {
1555 t0 = jit_get_reg(jit_class_fpr);
1556 vfp_movr_f(rn(t0), r1);
1557 VFMS_F32(rn(t0), r2, r3);
1558 vfp_movr_f(r0, rn(t0));
1559 jit_unget_reg(t0);
1560 }
1561 vfp_negr_f(r0, r0);
1562 }
1563 else {
1564 if (r0 != r3) {
1565 vfp_mulr_f(r0, r1, r2);
1566 vfp_subr_f(r0, r0, r3);
1567 }
1568 else {
1569 t0 = jit_get_reg(jit_class_fpr);
1570 vfp_mulr_f(rn(t0), r1, r2);
1571 vfp_subr_f(r0, rn(t0), r3);
1572 jit_unget_reg(t0);
1573 }
1574 }
1575}
1576
1577static void
1578_vfp_fnmar_f(jit_state_t *_jit,
1579 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1580{
1581 jit_int32_t t0;
1582 /* untested */
1583 if (0 && jit_cpu.vfp >= 4) {
1584 if (r0 != r2 && r0 != r3) {
1585 vfp_movr_f(r0, r1);
1586 VFNMA_F32(r0, r2, r3);
1587 }
1588 else {
1589 t0 = jit_get_reg(jit_class_fpr);
1590 vfp_movr_f(rn(t0), r1);
1591 VFNMA_F32(rn(t0), r2, r3);
1592 vfp_movr_f(r0, rn(t0));
1593 jit_unget_reg(t0);
1594 }
1595 }
1596 else {
1597 t0 = jit_get_reg(jit_class_fpr);
1598 vfp_negr_f(rn(t0), r1);
1599 vfp_mulr_f(rn(t0), rn(t0), r2);
1600 vfp_subr_f(r0, rn(t0), r3);
1601 jit_unget_reg(t0);
1602 }
1603}
1604
1605static void
1606_vfp_fnmsr_f(jit_state_t *_jit,
1607 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1608{
1609 jit_int32_t t0;
1610 /* untested */
1611 if (0 && jit_cpu.vfp >= 4) {
1612 if (r0 != r2 && r0 != r3) {
1613 vfp_movr_f(r0, r1);
1614 VFNMS_F32(r0, r2, r3);
1615 }
1616 else {
1617 t0 = jit_get_reg(jit_class_fpr);
1618 vfp_movr_f(rn(t0), r1);
1619 VFNMS_F32(rn(t0), r2, r3);
1620 vfp_movr_f(r0, rn(t0));
1621 jit_unget_reg(t0);
1622 }
1623 vfp_negr_f(r0, r0);
1624 }
1625 else {
1626 t0 = jit_get_reg(jit_class_fpr);
1627 vfp_negr_f(rn(t0), r1);
1628 vfp_mulr_f(rn(t0), rn(t0), r2);
1629 vfp_addr_f(r0, rn(t0), r3);
1630 jit_unget_reg(t0);
1631 }
1632}
1633
1634static void
1635_vfp_fmar_d(jit_state_t *_jit,
1636 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1637{
1638 jit_int32_t t0;
1639 /* untested */
1640 if (0 && jit_cpu.vfp >= 4) {
1641 if (r0 != r2 && r0 != r3) {
1642 vfp_movr_d(r0, r1);
1643 VFMA_F64(r0, r2, r3);
1644 }
1645 else {
1646 t0 = jit_get_reg(jit_class_fpr);
1647 vfp_movr_d(rn(t0), r1);
1648 VFMA_F64(rn(t0), r2, r3);
1649 vfp_movr_d(r0, rn(t0));
1650 jit_unget_reg(t0);
1651 }
1652 }
1653 else {
1654 if (r0 != r3) {
1655 vfp_mulr_d(r0, r1, r2);
1656 vfp_addr_d(r0, r0, r3);
1657 }
1658 else {
1659 t0 = jit_get_reg(jit_class_fpr);
1660 vfp_mulr_d(rn(t0), r1, r2);
1661 vfp_addr_d(r0, rn(t0), r3);
1662 jit_unget_reg(t0);
1663 }
1664 }
1665}
1666
1667static void
1668_vfp_fmsr_d(jit_state_t *_jit,
1669 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1670{
1671 jit_int32_t t0;
1672 /* untested */
1673 if (0 && jit_cpu.vfp >= 4) {
1674 if (r0 != r2 && r0 != r3) {
1675 vfp_movr_d(r0, r1);
1676 VFMS_F64(r0, r2, r3);
1677 }
1678 else {
1679 t0 = jit_get_reg(jit_class_fpr);
1680 vfp_movr_d(rn(t0), r1);
1681 VFMS_F64(rn(t0), r2, r3);
1682 vfp_movr_d(r0, rn(t0));
1683 jit_unget_reg(t0);
1684 }
1685 vfp_negr_d(r0, r0);
1686 }
1687 else {
1688 if (r0 != r3) {
1689 vfp_mulr_d(r0, r1, r2);
1690 vfp_subr_d(r0, r0, r3);
1691 }
1692 else {
1693 t0 = jit_get_reg(jit_class_fpr);
1694 vfp_mulr_d(rn(t0), r1, r2);
1695 vfp_subr_d(r0, rn(t0), r3);
1696 jit_unget_reg(t0);
1697 }
1698 }
1699}
1700
1701static void
1702_vfp_fnmar_d(jit_state_t *_jit,
1703 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1704{
1705 jit_int32_t t0;
1706 /* untested */
1707 if (0 && jit_cpu.vfp >= 4) {
1708 if (r0 != r2 && r0 != r3) {
1709 vfp_movr_d(r0, r1);
1710 VFNMA_F64(r0, r2, r3);
1711 }
1712 else {
1713 t0 = jit_get_reg(jit_class_fpr);
1714 vfp_movr_d(rn(t0), r1);
1715 VFNMA_F64(rn(t0), r2, r3);
1716 vfp_movr_d(r0, rn(t0));
1717 jit_unget_reg(t0);
1718 }
1719 }
1720 else {
1721 t0 = jit_get_reg(jit_class_fpr);
1722 vfp_negr_d(rn(t0), r1);
1723 vfp_mulr_d(rn(t0), rn(t0), r2);
1724 vfp_subr_d(r0, rn(t0), r3);
1725 jit_unget_reg(t0);
1726 }
1727}
1728
1729static void
1730_vfp_fnmsr_d(jit_state_t *_jit,
1731 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1732{
1733 jit_int32_t t0;
1734 /* untested */
1735 if (0 && jit_cpu.vfp >= 4) {
1736 if (r0 != r2 && r0 != r3) {
1737 vfp_movr_d(r0, r1);
1738 VFNMS_F64(r0, r2, r3);
1739 }
1740 else {
1741 t0 = jit_get_reg(jit_class_fpr);
1742 vfp_movr_d(rn(t0), r1);
1743 VFNMS_F64(rn(t0), r2, r3);
1744 vfp_movr_d(r0, rn(t0));
1745 jit_unget_reg(t0);
1746 }
1747 vfp_negr_d(r0, r0);
1748 }
1749 else {
1750 t0 = jit_get_reg(jit_class_fpr);
1751 vfp_negr_d(rn(t0), r1);
1752 vfp_mulr_d(rn(t0), rn(t0), r2);
1753 vfp_addr_d(r0, rn(t0), r3);
1754 jit_unget_reg(t0);
1755 }
1756}
1757
4a71579b
PC
1758# define fopi(name) \
1759static void \
1760_vfp_##name##i_f(jit_state_t *_jit, \
1761 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \
1762{ \
1763 jit_int32_t reg = jit_get_reg(jit_class_fpr); \
1764 vfp_movi_f(rn(reg), i0); \
1765 vfp_##name##r_f(r0, r1, rn(reg)); \
1766 jit_unget_reg(reg); \
1767}
1768# define dopi(name) \
1769static void \
1770_vfp_##name##i_d(jit_state_t *_jit, \
1771 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \
1772{ \
1773 jit_int32_t reg = jit_get_reg(jit_class_fpr); \
1774 vfp_movi_d(rn(reg), i0); \
1775 vfp_##name##r_d(r0, r1, rn(reg)); \
1776 jit_unget_reg(reg); \
1777}
1778# define fbopi(name) \
1779static jit_word_t \
1780_vfp_b##name##i_f(jit_state_t *_jit, \
1781 jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \
1782{ \
1783 jit_word_t word; \
1784 jit_int32_t reg = jit_get_reg(jit_class_fpr| \
1785 jit_class_nospill); \
1786 vfp_movi_f(rn(reg), i0); \
1787 word = vfp_b##name##r_f(r0, r1, rn(reg)); \
1788 jit_unget_reg(reg); \
1789 return (word); \
1790}
1791# define dbopi(name) \
1792static jit_word_t \
1793_vfp_b##name##i_d(jit_state_t *_jit, \
1794 jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \
1795{ \
1796 jit_word_t word; \
1797 jit_int32_t reg = jit_get_reg(jit_class_fpr| \
1798 jit_class_nospill); \
1799 vfp_movi_d(rn(reg), i0); \
1800 word = vfp_b##name##r_d(r0, r1, rn(reg)); \
1801 jit_unget_reg(reg); \
1802 return (word); \
1803}
1804
1805fopi(add)
1806dopi(add)
1807fopi(sub)
1808fopi(rsb)
1809dopi(rsb)
1810dopi(sub)
1811fopi(mul)
1812dopi(mul)
1813fopi(div)
1814dopi(div)
1815
1816static void
1817_vfp_cmp_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1818{
1819 jit_int32_t rg0, rg1;
1820 if (jit_fpr_p(r0)) {
1821 if (jit_fpr_p(r1))
1822 VCMP_F32(r0, r1);
1823 else {
1824 rg1 = jit_get_reg(jit_class_fpr);
1825 VMOV_S_A(rn(rg1), r1);
1826 VCMP_F32(r0, rn(rg1));
1827 jit_unget_reg(rg1);
1828 }
1829 }
1830 else {
1831 rg0 = jit_get_reg(jit_class_fpr);
1832 VMOV_S_A(rn(rg0), r0);
1833 if (jit_fpr_p(r1))
1834 VCMP_F32(rn(rg0), r1);
1835 else {
1836 rg1 = jit_get_reg(jit_class_fpr);
1837 VMOV_S_A(rn(rg1), r1);
1838 VCMP_F32(rn(rg0), rn(rg1));
1839 jit_unget_reg(rg1);
1840 }
1841 jit_unget_reg(rg0);
1842 }
1843}
1844
1845static void
1846_vfp_cmp_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1847{
1848 jit_int32_t rg0, rg1;
1849 if (jit_fpr_p(r0)) {
1850 if (jit_fpr_p(r1))
1851 VCMP_F64(r0, r1);
1852 else {
1853 rg1 = jit_get_reg(jit_class_fpr);
1854 VMOV_D_AA(rn(rg1), r1, r1 + 1);
1855 VCMP_F64(r0, rn(rg1));
1856 jit_unget_reg(rg1);
1857 }
1858 }
1859 else {
1860 rg0 = jit_get_reg(jit_class_fpr);
1861 VMOV_D_AA(rn(rg0), r0, r0 + 1);
1862 if (jit_fpr_p(r1))
1863 VCMP_F64(rn(rg0), r1);
1864 else {
1865 rg1 = jit_get_reg(jit_class_fpr);
1866 VMOV_D_AA(rn(rg1), r1, r1 + 1);
1867 VCMP_F64(rn(rg0), rn(rg1));
1868 jit_unget_reg(rg1);
1869 }
1870 jit_unget_reg(rg0);
1871 }
1872}
1873
1874static void
1875_vcmp01_x(jit_state_t *_jit, int c0, int c1, jit_int32_t r0)
1876{
1877 VMRS(_R15_REGNO);
1878 if (jit_thumb_p()) {
1879 if ((c0 ^ c1) >> 28 == 1) {
1880 ITE(c0);
1881 if (r0 < 8) {
1882 T1_MOVI(r0, 0);
1883 T1_MOVI(r0, 1);
1884 }
1885 else {
1886 T2_MOVI(r0, 0);
1887 T2_MOVI(r0, 1);
1888 }
1889 }
1890 else {
1891 if (r0 < 8) {
1892 IT(c0);
1893 T1_MOVI(r0, 0);
1894 IT(c1);
1895 T1_MOVI(r0, 1);
1896 }
1897 else {
1898 IT(c0);
1899 T2_MOVI(r0, 0);
1900 IT(c1);
1901 T2_MOVI(r0, 1);
1902 }
1903 }
1904 }
1905 else {
1906 CC_MOVI(c0, r0, 0);
1907 CC_MOVI(c1, r0, 1);
1908 }
1909}
1910
1911static void
1912_vcmp01_f(jit_state_t *_jit, int c0, int c1,
1913 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1914{
1915 vfp_cmp_f(r1, r2);
1916 vcmp01_x(c0, c1, r0);
1917}
1918
1919static void
1920_vcmp01_d(jit_state_t *_jit, int c0, int c1,
1921 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1922{
1923 vfp_cmp_d(r1, r2);
1924 vcmp01_x(c0, c1, r0);
1925}
1926
1927static void
1928_vcmp10_x(jit_state_t *_jit, int cc, jit_int32_t r0)
1929{
1930 if (jit_thumb_p()) {
1931 if (r0 < 8) {
1932 T1_MOVI(r0, 1);
1933 VMRS(_R15_REGNO);
1934 IT(cc);
1935 T1_MOVI(r0, 0);
1936 }
1937 else {
1938 T2_MOVI(r0, 1);
1939 VMRS(_R15_REGNO);
1940 IT(cc);
1941 T2_MOVI(r0, 0);
1942 }
1943 }
1944 else {
1945 VMRS(_R15_REGNO);
1946 MOVI(r0, 1);
1947 CC_MOVI(cc, r0, 0);
1948 }
1949}
1950static void
1951_vcmp_10_f(jit_state_t *_jit, int cc,
1952 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1953{
1954 vfp_cmp_f(r1, r2);
1955 vcmp10_x(cc, r0);
1956}
1957
1958static void
1959_vcmp_10_d(jit_state_t *_jit, int cc,
1960 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1961{
1962 vfp_cmp_d(r1, r2);
1963 vcmp10_x(cc, r0);
1964}
1965
1966fopi(lt)
1967dopi(lt)
1968fopi(le)
1969dopi(le)
1970fopi(eq)
1971dopi(eq)
1972fopi(ge)
1973dopi(ge)
1974fopi(gt)
1975dopi(gt)
1976fopi(ne)
1977dopi(ne)
1978fopi(unlt)
1979dopi(unlt)
1980fopi(unle)
1981dopi(unle)
1982
1983static void
1984_vfp_uneqr_x(jit_state_t *_jit, jit_int32_t r0)
1985{
1986 VMRS(_R15_REGNO);
1987 if (jit_thumb_p()) {
1988 ITE(ARM_CC_NE);
1989 if (r0 < 8) {
1990 T1_MOVI(r0, 0);
1991 T1_MOVI(r0, 1);
1992 IT(ARM_CC_VS);
1993 T1_MOVI(r0, 1);
1994 }
1995 else {
1996 T2_MOVI(r0, 0);
1997 T2_MOVI(r0, 1);
1998 IT(ARM_CC_VS);
1999 T2_MOVI(r0, 1);
2000 }
2001 }
2002 else {
2003 CC_MOVI(ARM_CC_NE, r0, 0);
2004 CC_MOVI(ARM_CC_EQ, r0, 1);
2005 CC_MOVI(ARM_CC_VS, r0, 1);
2006 }
2007}
2008
2009static void
2010_vfp_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2011{
2012 vfp_cmp_f(r1, r2);
2013 vfp_uneqr_x(r0);
2014}
2015
2016fopi(uneq)
2017
2018static void
2019_vfp_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2020{
2021 vfp_cmp_d(r1, r2);
2022 vfp_uneqr_x(r0);
2023}
2024
2025dopi(uneq)
2026
2027static void
2028_vcmp_01_x(jit_state_t *_jit, int cc, jit_int32_t r0)
2029{
2030 if (jit_thumb_p()) {
2031 if (r0 < 8) {
2032 T1_MOVI(r0, 0);
2033 VMRS(_R15_REGNO);
2034 IT(cc);
2035 T1_MOVI(r0, 1);
2036 }
2037 else {
2038 T2_MOVI(r0, 0);
2039 VMRS(_R15_REGNO);
2040 IT(cc);
2041 T2_MOVI(r0, 1);
2042 }
2043 }
2044 else {
2045 MOVI(r0, 0);
2046 VMRS(_R15_REGNO);
2047 CC_MOVI(cc, r0, 1);
2048 }
2049}
2050
2051static void
2052_vcmp_01_f(jit_state_t *_jit, int cc,
2053 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2054{
2055 vfp_cmp_f(r1, r2);
2056 vcmp_01_x(cc, r0);
2057}
2058
2059static void
2060_vcmp_01_d(jit_state_t *_jit, int cc,
2061 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2062{
2063 vfp_cmp_d(r1, r2);
2064 vcmp_01_x(cc, r0);
2065}
2066
2067fopi(unge)
2068dopi(unge)
2069fopi(ungt)
2070dopi(ungt)
2071
2072static void
2073_vfp_ltgtr_x(jit_state_t *_jit, jit_int32_t r0)
2074{
2075 VMRS(_R15_REGNO);
2076 if (jit_thumb_p()) {
2077 ITE(ARM_CC_NE);
2078 if (r0 < 8) {
2079 T1_MOVI(r0, 1);
2080 T1_MOVI(r0, 0);
2081 IT(ARM_CC_VS);
2082 T1_MOVI(r0, 0);
2083 }
2084 else {
2085 T2_MOVI(r0, 1);
2086 T2_MOVI(r0, 0);
2087 IT(ARM_CC_VS);
2088 T2_MOVI(r0, 0);
2089 }
2090 }
2091 else {
2092 CC_MOVI(ARM_CC_NE, r0, 1);
2093 CC_MOVI(ARM_CC_EQ, r0, 0);
2094 CC_MOVI(ARM_CC_VS, r0, 0);
2095 }
2096}
2097
2098static void
2099_vfp_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2100{
2101 vfp_cmp_f(r1, r2);
2102 vfp_ltgtr_x(r0);
2103}
2104
2105fopi(ltgt)
2106
2107static void
2108_vfp_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2109{
2110 vfp_cmp_d(r1, r2);
2111 vfp_ltgtr_x(r0);
2112}
2113
2114dopi(ltgt)
2115
2116static void
2117_vfp_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2118{
2119 vfp_cmp_f(r1, r2);
2120 vcmp10_x(ARM_CC_VS, r0);
2121}
2122
2123fopi(ord)
2124
2125static void
2126_vfp_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2127{
2128 vfp_cmp_d(r1, r2);
2129 vcmp10_x(ARM_CC_VS, r0);
2130}
2131
2132dopi(ord)
2133
2134static void
2135_vfp_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2136{
2137 vfp_cmp_f(r1, r2);
2138 vcmp_01_x(ARM_CC_VS, r0);
2139}
2140
2141fopi(unord)
2142
2143static void
2144_vfp_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2145{
2146 vfp_cmp_d(r1, r2);
2147 vcmp_01_x(ARM_CC_VS, r0);
2148}
2149
2150dopi(unord)
2151
2152static jit_word_t
2153_vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
2154{
2155 jit_word_t d, w;
2156 VMRS(_R15_REGNO);
2157 w = _jit->pc.w;
2158 if (jit_thumb_p()) {
2159 d = ((i0 - w) >> 1) - 2;
2160 assert(_s20P(d));
2161 T2_CC_B(cc, encode_thumb_cc_jump(d));
2162 }
2163 else {
2164 d = ((i0 - w) >> 2) - 2;
2165 assert(_s24P(d));
2166 CC_B(cc, d & 0x00ffffff);
2167 }
2168 return (w);
2169}
2170
2171
2172static jit_word_t
2173_vbcmp_f(jit_state_t *_jit, int cc,
2174 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2175{
2176 vfp_cmp_f(r0, r1);
2177 return (vbcmp_x(cc, i0));
2178}
2179
2180static jit_word_t
2181_vbcmp_d(jit_state_t *_jit, int cc,
2182 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2183{
2184 vfp_cmp_d(r0, r1);
2185 return (vbcmp_x(cc, i0));
2186}
2187
2188static jit_word_t
2189_vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0)
2190{
2191 jit_word_t d, p, w;
2192 VMRS(_R15_REGNO);
2193 p = _jit->pc.w;
2194 if (jit_thumb_p()) {
2195 T2_CC_B(cc, 0);
2196 w = _jit->pc.w;
2197 d = ((i0 - w) >> 1) - 2;
2198 assert(_s20P(d));
2199 T2_B(encode_thumb_jump(d));
2200 }
2201 else {
2202 CC_B(cc, 0);
2203 w = _jit->pc.w;
2204 d = ((i0 - w) >> 2) - 2;
2205 assert(_s24P(d));
2206 B(d & 0x00ffffff);
2207 }
2208 patch_at(arm_patch_jump, p, _jit->pc.w);
2209 return (w);
2210}
2211
2212static jit_word_t
2213_vbncmp_f(jit_state_t *_jit, int cc,
2214 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2215{
2216 vfp_cmp_f(r0, r1);
2217 return (vbncmp_x(cc, i0));
2218}
2219
2220static jit_word_t
2221_vbncmp_d(jit_state_t *_jit, int cc,
2222 jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2223{
2224 vfp_cmp_d(r0, r1);
2225 return (vbncmp_x(cc, i0));
2226}
2227
2228fbopi(lt)
2229dbopi(lt)
2230fbopi(le)
2231dbopi(le)
2232fbopi(eq)
2233dbopi(eq)
2234fbopi(ge)
2235dbopi(ge)
2236fbopi(gt)
2237dbopi(gt)
2238fbopi(ne)
2239dbopi(ne)
2240fbopi(unlt)
2241dbopi(unlt)
2242fbopi(unle)
2243dbopi(unle)
2244
2245static jit_word_t
2246_vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0)
2247{
2248 jit_word_t d, p, q, w;
2249 VMRS(_R15_REGNO);
2250 p = _jit->pc.w;
2251 if (jit_thumb_p()) {
2252 T2_CC_B(ARM_CC_VS, 0);
2253 q = _jit->pc.w;
2254 T2_CC_B(ARM_CC_NE, 0);
2255 patch_at(arm_patch_jump, p, _jit->pc.w);
2256 w = _jit->pc.w;
2257 d = ((i0 - w) >> 1) - 2;
2258 assert(_s20P(d));
2259 T2_B(encode_thumb_jump(d));
2260 }
2261 else {
2262 CC_B(ARM_CC_VS, 0);
2263 q = _jit->pc.w;
2264 CC_B(ARM_CC_NE, 0);
2265 patch_at(arm_patch_jump, p, _jit->pc.w);
2266 w = _jit->pc.w;
2267 d = ((i0 - w) >> 2) - 2;
2268 assert(_s24P(d));
2269 B(d & 0x00ffffff);
2270 }
2271 patch_at(arm_patch_jump, q, _jit->pc.w);
2272 return (w);
2273}
2274
2275static jit_word_t
2276_vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2277{
2278 vfp_cmp_f(r0, r1);
2279 return (vfp_buneqr_x(i0));
2280}
2281
2282fbopi(uneq)
2283
2284static jit_word_t
2285_vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2286{
2287 vfp_cmp_d(r0, r1);
2288 return (vfp_buneqr_x(i0));
2289}
2290
2291dbopi(uneq)
2292
2293static jit_word_t
2294_vfp_bunger_x(jit_state_t *_jit, jit_word_t i0)
2295{
2296 jit_word_t d, p, w;
2297 VMRS(_R15_REGNO);
2298 p = _jit->pc.w;
2299 if (jit_thumb_p()) {
2300 T2_CC_B(ARM_CC_MI, 0);
2301 w = _jit->pc.w;
2302 d = ((i0 - w) >> 1) - 2;
2303 assert(_s20P(d));
2304 T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d));
2305 }
2306 else {
2307 CC_B(ARM_CC_MI, 0);
2308 w = _jit->pc.w;
2309 d = ((i0 - w) >> 2) - 2;
2310 assert(_s24P(d));
2311 CC_B(ARM_CC_HS, d & 0x00ffffff);
2312 }
2313 patch_at(arm_patch_jump, p, _jit->pc.w);
2314 return (w);
2315}
2316
2317static jit_word_t
2318_vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2319{
2320 vfp_cmp_f(r0, r1);
2321 return (vfp_bunger_x(i0));
2322}
2323
2324fbopi(unge)
2325
2326static jit_word_t
2327_vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2328{
2329 vfp_cmp_d(r0, r1);
2330 return (vfp_bunger_x(i0));
2331}
2332
2333dbopi(unge)
2334
2335static jit_word_t
2336_vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0)
2337{
2338 jit_word_t d, p, q, w;
2339 VMRS(_R15_REGNO);
2340 p = _jit->pc.w;
2341 if (jit_thumb_p()) {
2342 T2_CC_B(ARM_CC_VS, 0);
2343 q = _jit->pc.w;
2344 T2_CC_B(ARM_CC_EQ, 0);
2345 w = _jit->pc.w;
2346 d = ((i0 - w) >> 1) - 2;
2347 assert(_s20P(d));
2348 T2_B(encode_thumb_jump(d));
2349 }
2350 else {
2351 CC_B(ARM_CC_VS, 0);
2352 q = _jit->pc.w;
2353 CC_B(ARM_CC_EQ, 0);
2354 w = _jit->pc.w;
2355 d = ((i0 - w) >> 2) - 2;
2356 assert(_s24P(d));
2357 B(d & 0x00ffffff);
2358 }
2359 patch_at(arm_patch_jump, p, _jit->pc.w);
2360 patch_at(arm_patch_jump, q, _jit->pc.w);
2361 return (w);
2362}
2363
2364static jit_word_t
2365_vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2366{
2367 vfp_cmp_f(r0, r1);
2368 return (vfp_bltgtr_x(i0));
2369}
2370
2371fbopi(ungt)
2372dbopi(ungt)
2373fbopi(ltgt)
2374
2375static jit_word_t
2376_vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2377{
2378 vfp_cmp_d(r0, r1);
2379 return (vfp_bltgtr_x(i0));
2380}
2381
2382dbopi(ltgt)
2383fbopi(ord)
2384dbopi(ord)
2385fbopi(unord)
2386dbopi(unord)
2387
2388static void
2389_vfp_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2390{
2391 jit_int32_t gpr;
2392 if (jit_fpr_p(r0)) {
2393 gpr = jit_get_reg(jit_class_gpr);
2394 movi(rn(gpr), i0);
2395 VLDR_F32(r0, rn(gpr), 0);
2396 jit_unget_reg(gpr);
2397 }
2398 else
2399 ldi_i(r0, i0);
2400}
2401
2402static void
2403_vfp_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
2404{
2405 jit_int32_t reg;
2406 reg = jit_get_reg(jit_class_gpr);
2407 movi(rn(reg), i0);
2408 if (jit_fpr_p(r0))
2409 VLDR_F64(r0, rn(reg), 0);
2410 else {
2411 ldr_i(r0, rn(reg));
2412 ldxi_i(r0 + 1, rn(reg), 4);
2413 }
2414 jit_unget_reg(reg);
2415}
2416
2417static void
2418_vfp_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2419{
2420 jit_int32_t reg;
2421 if (jit_fpr_p(r0)) {
2422 reg = jit_get_reg(jit_class_gpr);
2423 addr(rn(reg), r1, r2);
2424 VLDR_F32(r0, rn(reg), 0);
2425 jit_unget_reg(reg);
2426 }
2427 else
2428 ldxr_i(r0, r1, r2);
2429}
2430
2431static void
2432_vfp_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2433{
2434 jit_int32_t reg;
2435 reg = jit_get_reg(jit_class_gpr);
2436 addr(rn(reg), r1, r2);
2437 if (jit_fpr_p(r0))
2438 VLDR_F64(r0, rn(reg), 0);
2439 else {
2440 ldr_i(r0, rn(reg));
2441 ldxi_i(r0 + 1, rn(reg), 4);
2442 }
2443 jit_unget_reg(reg);
2444}
2445
2446static void
2447_vfp_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2448{
2449 jit_int32_t reg;
2450 if (jit_fpr_p(r0)) {
2451 if (i0 >= 0) {
2452 assert(!(i0 & 3));
2453 if (i0 < 1024)
2454 VLDR_F32(r0, r1, i0 >> 2);
2455 else {
2456 reg = jit_get_reg(jit_class_gpr);
2457 addi(rn(reg), r1, i0);
2458 VLDR_F32(r0, rn(reg), 0);
2459 jit_unget_reg(reg);
2460 }
2461 }
2462 else {
2463 i0 = -i0;
2464 assert(!(i0 & 3));
2465 if (i0 < 1024)
2466 VLDRN_F32(r0, r1, i0 >> 2);
2467 else {
2468 reg = jit_get_reg(jit_class_gpr);
2469 subi(rn(reg), r1, i0);
2470 VLDR_F32(r0, rn(reg), 0);
2471 jit_unget_reg(reg);
2472 }
2473 }
2474 }
2475 else
2476 ldxi_i(r0, r1, i0);
2477}
2478
2479static void
2480_vfp_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2481{
2482 jit_int32_t reg;
2483 if (jit_fpr_p(r0)) {
2484 if (i0 >= 0) {
2485 assert(!(i0 & 3));
2486 if (i0 < 1024)
2487 VLDR_F64(r0, r1, i0 >> 2);
2488 else {
2489 reg = jit_get_reg(jit_class_gpr);
2490 addi(rn(reg), r1, i0);
2491 VLDR_F64(r0, rn(reg), 0);
2492 jit_unget_reg(reg);
2493 }
2494 }
2495 else {
2496 i0 = -i0;
2497 assert(!(i0 & 3));
2498 if (i0 < 1024)
2499 VLDRN_F64(r0, r1, i0 >> 2);
2500 else {
2501 reg = jit_get_reg(jit_class_gpr);
2502 subi(rn(reg), r1, i0);
2503 VLDR_F64(r0, rn(reg), 0);
2504 jit_unget_reg(reg);
2505 }
2506 }
2507 }
2508 else {
2509 reg = jit_get_reg(jit_class_gpr);
2510 addi(rn(reg), r1, i0);
2511 ldr_i(r0, rn(reg));
2512 ldxi_i(r0 + 1, rn(reg), 4);
2513 jit_unget_reg(reg);
2514 }
2515}
2516
ba86ff93
PC
2517static void
2518_vfp_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2519{
2520 jit_int32_t t0, r2;
2521 jit_int32_t t1, r3;
2522 assert(i0 == 4 || i0 == 8);
2523 if (jit_vfp_unaligned_p()) {
2524 t0 = jit_get_reg(jit_class_gpr);
2525 r2 = rn(t0);
2526 if (i0 == 4) {
2527 if (jit_unaligned_p())
2528 unldr(r2, r1, 4);
2529 else
2530 ldr(r2, r1);
2531 vfp_movr_w_f(r0, r2);
2532 }
2533 else {
2534 t1 = jit_get_reg(jit_class_gpr);
2535 r3 = rn(t1);
2536#if __BYTE_ORDER == __LITTLE_ENDIAN
2537 if (jit_unaligned_p()) {
2538 unldr(r2, r1, 4);
2539 addi(r3, r1, 4);
2540 unldr(r3, r3, 4);
2541 }
2542 else {
2543 ldr(r2, r1);
2544 ldxi(r3, r1, 4);
2545 }
2546#else
2547 if (jit_unaligned_p()) {
2548 unldr(r3, r1, 4);
2549 addi(r2, r1, 4);
2550 unldr(r2, r2, 4);
2551 }
2552 else {
2553 ldr(r3, r1);
2554 ldxi(r2, r1, 4);
2555 }
2556#endif
2557 vfp_movr_ww_d(r0, r2, r3);
2558 jit_unget_reg(t1);
2559 }
2560 jit_unget_reg(t0);
2561 }
2562 else {
2563 if (i0 == 4)
2564 vfp_ldr_f(r0, r1);
2565 else
2566 vfp_ldr_d(r0, r1);
2567 }
2568}
2569
2570static void
2571_vfp_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
2572{
2573 jit_int32_t t0, r2;
2574 jit_int32_t t1, r3;
2575 assert(i1 == 4 || i1 == 8);
2576 if (jit_vfp_unaligned_p()) {
2577 t0 = jit_get_reg(jit_class_gpr);
2578 r2 = rn(t0);
2579 if (i1 == 4) {
2580 unldi(r2, i0, 4);
2581 vfp_movr_w_f(r0, r2);
2582 }
2583 else {
2584 t1 = jit_get_reg(jit_class_gpr);
2585 r3 = rn(t1);
2586#if __BYTE_ORDER == __LITTLE_ENDIAN
2587 if (jit_unaligned_p()) {
2588 unldi(r2, i0, 4);
2589 unldi(r3, i0 + 4, 4);
2590 }
2591 else {
2592 ldi(r2, i0);
2593 ldi(r3, i0 + 4);
2594 }
2595#else
2596 if (jit_unaligned_p()) {
2597 unldi(r3, i0, 4);
2598 unldi(r2, i0 + 4, 4);
2599 }
2600 else {
2601 ldi(r3, i0);
2602 ldi(r2, i0 + 4);
2603 }
2604#endif
2605 vfp_movr_ww_d(r0, r3, r2);
2606 jit_unget_reg(t1);
2607 }
2608 jit_unget_reg(t0);
2609 }
2610 else {
2611 if (i0 == 4)
2612 vfp_ldi_f(r0, i0);
2613 else
2614 vfp_ldi_d(r0, i0);
2615 }
2616}
2617
4a71579b
PC
2618static void
2619_vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2620{
2621 jit_int32_t reg;
2622 if (jit_fpr_p(r0)) {
2623 reg = jit_get_reg(jit_class_gpr);
2624 movi(rn(reg), i0);
2625 VSTR_F32(r0, rn(reg), 0);
2626 jit_unget_reg(reg);
2627 }
2628 else
2629 sti_i(i0, r0);
2630}
2631
2632static void
2633_vfp_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
2634{
2635 jit_int32_t reg;
2636 reg = jit_get_reg(jit_class_gpr);
2637 movi(rn(reg), i0);
2638 if (jit_fpr_p(r0))
2639 VSTR_F64(r0, rn(reg), 0);
2640 else {
2641 str_i(rn(reg), r0);
2642 stxi_i(4, rn(reg), r0 + 1);
2643 }
2644 jit_unget_reg(reg);
2645}
2646
2647static void
2648_vfp_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2649{
2650 jit_int32_t reg;
2651 if (jit_fpr_p(r2)) {
2652 reg = jit_get_reg(jit_class_gpr);
2653 addr(rn(reg), r0, r1);
2654 VSTR_F32(r2, rn(reg), 0);
2655 jit_unget_reg(reg);
2656 }
2657 else
2658 stxr_i(r0, r1, r2);
2659}
2660
2661static void
2662_vfp_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
2663{
2664 jit_int32_t reg;
2665 reg = jit_get_reg(jit_class_gpr);
2666 addr(rn(reg), r0, r1);
2667 if (jit_fpr_p(r2))
2668 VSTR_F64(r2, rn(reg), 0);
2669 else {
2670 str_i(rn(reg), r2);
2671 stxi_i(4, rn(reg), r2 + 1);
2672 }
2673 jit_unget_reg(reg);
2674}
2675
2676static void
2677_vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2678{
2679 jit_int32_t reg;
2680 if (jit_fpr_p(r1)) {
2681 if (i0 >= 0) {
2682 assert(!(i0 & 3));
2683 if (i0 < 1024)
2684 VSTR_F32(r1, r0, i0 >> 2);
2685 else {
2686 reg = jit_get_reg(jit_class_gpr);
2687 addi(rn(reg), r0, i0);
2688 VSTR_F32(r1, rn(reg), 0);
2689 jit_unget_reg(reg);
2690 }
2691 }
2692 else {
2693 i0 = -i0;
2694 assert(!(i0 & 3));
2695 if (i0 < 1024)
2696 VSTRN_F32(r1, r0, i0 >> 2);
2697 else {
2698 reg = jit_get_reg(jit_class_gpr);
2699 subi(rn(reg), r0, i0);
2700 VSTR_F32(r1, rn(reg), 0);
2701 jit_unget_reg(reg);
2702 }
2703 }
2704 }
2705 else
2706 stxi_i(i0, r0, r1);
2707}
2708
2709static void
2710_vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2711{
2712 jit_int32_t reg;
2713 if (jit_fpr_p(r1)) {
2714 if (i0 >= 0) {
2715 assert(!(i0 & 3));
2716 if (i0 < 0124)
2717 VSTR_F64(r1, r0, i0 >> 2);
2718 else {
2719 reg = jit_get_reg(jit_class_gpr);
2720 addi(rn(reg), r0, i0);
2721 VSTR_F64(r1, rn(reg), 0);
2722 jit_unget_reg(reg);
2723 }
2724 }
2725 else {
2726 i0 = -i0;
2727 assert(!(i0 & 3));
2728 if (i0 < 1024)
2729 VSTRN_F64(r1, r0, i0 >> 2);
2730 else {
2731 reg = jit_get_reg(jit_class_gpr);
2732 subi(rn(reg), r0, i0);
2733 VSTR_F64(r1, rn(reg), 0);
2734 jit_unget_reg(reg);
2735 }
2736 }
2737 }
2738 else {
2739 reg = jit_get_reg(jit_class_gpr);
2740 addi(rn(reg), r0, i0);
2741 str_i(rn(reg), r1);
2742 stxi_i(4, rn(reg), r1 + 1);
2743 jit_unget_reg(reg);
2744 }
2745}
2746
ba86ff93
PC
2747static void
2748_vfp_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
2749{
2750 jit_int32_t t0, r2;
2751 jit_int32_t t1, r3;
2752 assert(i0 == 4 || i0 == 8);
2753 if (jit_vfp_unaligned_p()) {
2754 t0 = jit_get_reg(jit_class_gpr);
2755 r2 = rn(t0);
2756 if (i0 == 4) {
2757 vfp_movr_f_w(r2, r1);
2758 if (jit_unaligned_p())
2759 unstr(r0, r2, 4);
2760 else
2761 str(r0, r2);
2762 }
2763 else {
2764 t1 = jit_get_reg(jit_class_gpr);
2765 r3 = rn(t1);
2766 vfp_movr_d_ww(r2, r3, r1);
2767#if __BYTE_ORDER == __LITTLE_ENDIAN
2768 if (jit_unaligned_p()) {
2769 unstr(r0, r2, 4);
2770 addi(r2, r0, 4);
2771 unstr(r2, r3, 4);
2772 }
2773 else {
2774 str(r0, r2);
2775 stxi(4, r0, r3);
2776 }
2777#else
2778 if (jit_unaligned_p()) {
2779 unstr(r0, r3, 4);
2780 addi(r3, r0, 4);
2781 unstr(r3, r2, 4);
2782 }
2783 else {
2784 str(r0, r3);
2785 stxi(4, r0, r2);
2786 }
2787#endif
2788 jit_unget_reg(t1);
2789 }
2790 jit_unget_reg(t0);
2791 }
2792 else {
2793 if (i0 == 4)
2794 vfp_str_f(r0, r1);
2795 else
2796 vfp_str_d(r0, r1);
2797 }
2798}
2799
2800static void
2801_vfp_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
2802{
2803 jit_int32_t t0, r2;
2804 jit_int32_t t1, r3;
2805 assert(i1 == 4 || i1 == 8);
2806 if (jit_vfp_unaligned_p()) {
2807 t0 = jit_get_reg(jit_class_gpr);
2808 r2 = rn(t0);
2809 if (i1 == 4) {
2810 vfp_movr_f_w(r2, r0);
2811 if (jit_unaligned_p())
2812 unsti(i0, r2, 4);
2813 else
2814 sti(i0, r2);
2815 }
2816 else {
2817 t1 = jit_get_reg(jit_class_gpr);
2818 r3 = rn(t1);
2819 vfp_movr_d_ww(r2, r3, r0);
2820#if __BYTE_ORDER == __LITTLE_ENDIAN
2821 if (jit_unaligned_p()) {
2822 unsti(i0, r3, 4);
2823 unsti(i0 + 4, r2, 4);
2824 }
2825 else {
2826 sti(i0, r3);
2827 sti(i0 + 4, r2);
2828 }
2829#else
2830 if (jit_unaligned_p()) {
2831 unsti(i0, r2, 4);
2832 unsti(i0 + 4, r3, 4);
2833 }
2834 else {
2835 sti(i0, r2);
2836 sti(i0 + 4, r3);
2837 }
2838#endif
2839 jit_unget_reg(t1);
2840 }
2841 jit_unget_reg(t0);
2842 }
2843 else {
2844 if (i1 == 4)
2845 vfp_sti_f(i0, r0);
2846 else
2847 vfp_sti_d(i0, r0);
2848 }
2849}
2850
4a71579b
PC
2851static void
2852_vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
2853{
2854 jit_int32_t reg;
2855
2856 assert(_jitc->function->self.call & jit_call_varargs);
2857
2858 /* Adjust pointer. */
2859 reg = jit_get_reg(jit_class_gpr);
2860 andi(rn(reg), r1, 7);
2861 addr(r1, r1, rn(reg));
2862 jit_unget_reg(reg);
2863
2864 /* Load argument. */
2865 vfp_ldr_d(r0, r1);
2866
2867 /* Update stack pointer. */
2868 addi(r1, r1, sizeof(jit_float64_t));
2869}
2870# undef dbopi
2871# undef fbopi
2872# undef dopi
2873# undef fopi
2874#endif