git subrepo pull (merge) --force deps/lightning
[pcsx_rearmed.git] / deps / lightning / lib / jit_aarch64-fpu.c
1 /*
2  * Copyright (C) 2013-2023  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #if PROTO
21 #  define A64_CNT                       0x0e205800
22 #  define A64_ADDV                      0x0e31b800
23 #  define A64_SCVTF                     0x1e220000
24 #  define A64_FMOVWV                    0x1e260000
25 #  define A64_FMOVVW                    0x1e270000
26 #  define A64_FMOVXV                    0x9e260000
27 #  define A64_FMOVVX                    0x9e270000
28 #  define A64_FCVTZS                    0x1e380000
29 #  define A64_FCMPE                     0x1e202010
30 #  define A64_FMOV                      0x1e204000
31 #  define A64_FABS                      0x1e20c000
32 #  define A64_FNEG                      0x1e214000
33 #  define A64_FSQRT                     0x1e21c000
34 #  define A64_FMADD                     0x1f000000
35 #  define A64_FMSUB                     0x1f008000
36 #  define A64_FNMADD                    0x1f200000
37 #  define A64_FNMSUB                    0x1f208000
38 #  define A64_FCVTS                     0x1e224000
39 #  define A64_FCVTD                     0x1e22c000
40 #  define A64_FMUL                      0x1e200800
41 #  define A64_FDIV                      0x1e201800
42 #  define A64_FADD                      0x1e202800
43 #  define A64_FSUB                      0x1e203800
44 #  define CNT(Rd,Rn)                    vqo_vv(0,A64_CNT,Rn,Rd)
45 #  define ADDV(Rd,Rn)                   vqo_vv(0,A64_ADDV,Rn,Rd)
46 #  define FCMPES(Rn,Rm)                 os_vv(A64_FCMPE,0,Rn,Rm)
47 #  define FCMPED(Rn,Rm)                 os_vv(A64_FCMPE,1,Rn,Rm)
48 #  define FMOVS(Rd,Rn)                  osvv_(A64_FMOV,0,Rd,Rn)
49 #  define FMOVD(Rd,Rn)                  osvv_(A64_FMOV,1,Rd,Rn)
50 #  define FMOVWS(Rd,Rn)                 osvv_(A64_FMOVWV,0,Rd,Rn)
51 #  define FMOVSW(Rd,Rn)                 osvv_(A64_FMOVVW,0,Rd,Rn)
52 #  define FMOVXD(Rd,Rn)                 osvv_(A64_FMOVXV,1,Rd,Rn)
53 #  define FMOVDX(Rd,Rn)                 osvv_(A64_FMOVVX,1,Rd,Rn)
54 #  define FCVT_SD(Rd,Rn)                osvv_(A64_FCVTS,1,Rd,Rn)
55 #  define FCVT_DS(Rd,Rn)                osvv_(A64_FCVTD,0,Rd,Rn)
56 #  define SCVTFS(Rd,Rn)                 osvv_(A64_SCVTF|XS,0,Rd,Rn)
57 #  define SCVTFD(Rd,Rn)                 osvv_(A64_SCVTF|XS,1,Rd,Rn)
58 #  define FCVTSZ_WS(Rd,Rn)              osvv_(A64_FCVTZS,0,Rd,Rn)
59 #  define FCVTSZ_WD(Rd,Rn)              osvv_(A64_FCVTZS,1,Rd,Rn)
60 #  define FCVTSZ_XS(Rd,Rn)              osvv_(A64_FCVTZS|XS,0,Rd,Rn)
61 #  define FCVTSZ_XD(Rd,Rn)              osvv_(A64_FCVTZS|XS,1,Rd,Rn)
62 #  define FABSS(Rd,Rn)                  osvv_(A64_FABS,0,Rd,Rn)
63 #  define FABSD(Rd,Rn)                  osvv_(A64_FABS,1,Rd,Rn)
64 #  define FNEGS(Rd,Rn)                  osvv_(A64_FNEG,0,Rd,Rn)
65 #  define FNEGD(Rd,Rn)                  osvv_(A64_FNEG,1,Rd,Rn)
66 #  define FSQRTS(Rd,Rn)                 osvv_(A64_FSQRT,0,Rd,Rn)
67 #  define FSQRTD(Rd,Rn)                 osvv_(A64_FSQRT,1,Rd,Rn)
68 /* Vd = Va + Vn*Vm */
69 #  define FMADDS(Rd,Rn,Rm,Ra)           osvvvv(A64_FMADD,0,Rd,Rn,Rm,Ra)
70 #  define FMADDD(Rd,Rn,Rm,Ra)           osvvvv(A64_FMADD,1,Rd,Rn,Rm,Ra)
71 /* Vd = Va + (-Vn)*Vm */
72 #  define FMSUBS(Rd,Rn,Rm,Ra)           osvvvv(A64_FMSUB,0,Rd,Rn,Rm,Ra)
73 #  define FMSUBD(Rd,Rn,Rm,Ra)           osvvvv(A64_FMSUB,1,Rd,Rn,Rm,Ra)
74 /* Vd = (-Va) + (-Vn)*Vm */
75 #  define FNMADDS(Rd,Rn,Rm,Ra)          osvvvv(A64_FNMADD,0,Rd,Rn,Rm,Ra)
76 #  define FNMADDD(Rd,Rn,Rm,Ra)          osvvvv(A64_FNMADD,1,Rd,Rn,Rm,Ra)
77 /* Vd = (-Va) + Vn*Vm */
78 #  define FNMSUBS(Rd,Rn,Rm,Ra)          osvvvv(A64_FNMSUB,0,Rd,Rn,Rm,Ra)
79 #  define FNMSUBD(Rd,Rn,Rm,Ra)          osvvvv(A64_FNMSUB,1,Rd,Rn,Rm,Ra)
80 #  define FADDS(Rd,Rn,Rm)               osvvv(A64_FADD,0,Rd,Rn,Rm)
81 #  define FADDD(Rd,Rn,Rm)               osvvv(A64_FADD,1,Rd,Rn,Rm)
82 #  define FADDV(Rd,Rn,Rm)               osvvv(A64_FADD,0,Rd,Rn,Rm)
83 #  define FSUBS(Rd,Rn,Rm)               osvvv(A64_FSUB,0,Rd,Rn,Rm)
84 #  define FSUBD(Rd,Rn,Rm)               osvvv(A64_FSUB,1,Rd,Rn,Rm)
85 #  define FMULS(Rd,Rn,Rm)               osvvv(A64_FMUL,0,Rd,Rn,Rm)
86 #  define FMULD(Rd,Rn,Rm)               osvvv(A64_FMUL,1,Rd,Rn,Rm)
87 #  define FDIVS(Rd,Rn,Rm)               osvvv(A64_FDIV,0,Rd,Rn,Rm)
88 #  define FDIVD(Rd,Rn,Rm)               osvvv(A64_FDIV,1,Rd,Rn,Rm)
89 #  define osvvv(Op,Sz,Rd,Rn,Rm)         _osvvv(_jit,Op,Sz,Rd,Rn,Rm)
90 static void _osvvv(jit_state_t*,jit_int32_t,jit_int32_t,
91                    jit_int32_t,jit_int32_t,jit_int32_t);
92 #  define osvvvv(Op,Sz,Rd,Rn,Rm,Ra)     _osvvvv(_jit,Op,Sz,Rd,Rn,Rm,Ra)
93 static void _osvvvv(jit_state_t*,jit_int32_t,jit_int32_t,
94                     jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
95 #  define osvv_(Op,Sz,Rd,Rn)            _osvv_(_jit,Op,Sz,Rd,Rn)
96 static void _osvv_(jit_state_t*,jit_int32_t,
97                    jit_int32_t,jit_int32_t,jit_int32_t);
98 #  define os_vv(Op,Sz,Rn,Rm)            _os_vv(_jit,Op,Sz,Rn,Rm)
99 static void _os_vv(jit_state_t*,jit_int32_t,
100                    jit_int32_t,jit_int32_t,jit_int32_t);
101 #  define vqo_vv(Q,Op,Rn,Rd)            _vqo_vv(_jit,Q,Op,Rn,Rd)
102 static void _vqo_vv(jit_state_t*,jit_int32_t,
103                     jit_int32_t,jit_int32_t,jit_int32_t);
104 #  define popcntr(r0,r1)                _popcntr(_jit,r0,r1);
105 static void _popcntr(jit_state_t*,jit_int32_t,jit_int32_t);
106 #  define truncr_f_i(r0,r1)             _truncr_f_i(_jit,r0,r1)
107 static void _truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t);
108 #  define truncr_f_l(r0,r1)             FCVTSZ_XS(r0,r1)
109 #  define truncr_d_i(r0,r1)             _truncr_d_i(_jit,r0,r1)
110 static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t);
111 #  define truncr_d_l(r0,r1)             FCVTSZ_XD(r0,r1)
112 #  define addr_f(r0,r1,r2)              FADDS(r0,r1,r2)
113 #  define addi_f(r0,r1,i0)              _addi_f(_jit,r0,r1,i0)
114 static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
115 #  define subr_f(r0,r1,r2)              FSUBS(r0,r1,r2)
116 #  define subi_f(r0,r1,i0)              _subi_f(_jit,r0,r1,i0)
117 static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
118 #  define rsbr_f(r0, r1, r2)            subr_f(r0, r2, r1)
119 #  define rsbi_f(r0, r1, i0)            _rsbi_f(_jit, r0, r1, i0)
120 static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
121 #  define mulr_f(r0,r1,r2)              FMULS(r0,r1,r2)
122 #  define muli_f(r0,r1,i0)              _muli_f(_jit,r0,r1,i0)
123 static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
124 #  define divr_f(r0,r1,r2)              FDIVS(r0,r1,r2)
125 #  define divi_f(r0,r1,i0)              _divi_f(_jit,r0,r1,i0)
126 static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
127 #  define absr_f(r0,r1)                 FABSS(r0,r1)
128 #  define negr_f(r0,r1)                 FNEGS(r0,r1)
129 #  define sqrtr_f(r0,r1)                FSQRTS(r0,r1)
130 #  define fmar_f(r0,r1,r2,r3)           FMADDS(r0,r1,r2,r3)
131 #  define fmsr_f(r0,r1,r2,r3)           FNMSUBS(r0,r1,r2,r3)
132 #  define fnmar_f(r0,r1,r2,r3)          FNMADDS(r0,r1,r2,r3)
133 #  define fnmsr_f(r0,r1,r2,r3)          FMSUBS(r0,r1,r2,r3)
134 #  define extr_f(r0,r1)                 SCVTFS(r0,r1)
135 #  define ldr_f(r0,r1)                  _ldr_f(_jit,r0,r1)
136 static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t);
137 #  define ldi_f(r0,i0)                  _ldi_f(_jit,r0,i0)
138 static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
139 #  define ldxr_f(r0,r1,r2)              _ldxr_f(_jit,r0,r1,r2)
140 static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
141 #  define ldxi_f(r0,r1,i0)              _ldxi_f(_jit,r0,r1,i0)
142 static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
143 #  define unldr_x(r0, r1, i0)           generic_unldr_x(r0, r1, i0)
144 #  define unldi_x(r0, i0, i1)           generic_unldi_x(r0, i0, i1)
145 #  define str_f(r0,r1)                  _str_f(_jit,r0,r1)
146 static void _str_f(jit_state_t*,jit_int32_t,jit_int32_t);
147 #  define sti_f(i0,r0)                  _sti_f(_jit,i0,r0)
148 static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
149 #  define stxr_f(r0,r1,r2)              _stxr_f(_jit,r0,r1,r2)
150 static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
151 #  define stxi_f(i0,r0,r1)              _stxi_f(_jit,i0,r0,r1)
152 static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
153 #  define unstr_x(r0, r1, i0)           generic_unstr_x(r0, r1, i0)
154 #  define unsti_x(i0, r0, i1)           generic_unsti_x(i0, r0, i1)
155 #  define movr_f(r0,r1)                 _movr_f(_jit,r0,r1)
156 static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t);
157 #  define movi_f(r0,i0)                 _movi_f(_jit,r0,i0)
158 static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t);
159 #  define movr_w_f(r0,r1)               FMOVSW(r0, r1)
160 #  define movr_f_w(r0,r1)               FMOVWS(r0, r1)
161 #  define movi_w_f(r0, i0)              _movi_w_f(_jit, r0, i0)
162 static void _movi_w_f(jit_state_t*, jit_int32_t, jit_word_t);
163 #  define extr_d_f(r0,r1)               FCVT_SD(r0,r1)
164 #  define fccr(cc,r0,r1,r2)             _fccr(_jit,cc,r0,r1,r2)
165 static void _fccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
166 #  define fcci(cc,r0,r1,i0)             _fcci(_jit,cc,r0,r1,i0)
167 static void _fcci(jit_state_t*,
168                   jit_int32_t,jit_int32_t,jit_int32_t,jit_float32_t);
169 #  define ltr_f(r0,r1,r2)               fccr(CC_MI,r0,r1,r2)
170 #  define lti_f(r0,r1,i0)               fcci(CC_MI,r0,r1,i0)
171 #  define ler_f(r0,r1,r2)               fccr(CC_LS,r0,r1,r2)
172 #  define lei_f(r0,r1,i0)               fcci(CC_LS,r0,r1,i0)
173 #  define eqr_f(r0,r1,r2)               fccr(CC_EQ,r0,r1,r2)
174 #  define eqi_f(r0,r1,i0)               fcci(CC_EQ,r0,r1,i0)
175 #  define ger_f(r0,r1,r2)               fccr(CC_GE,r0,r1,r2)
176 #  define gei_f(r0,r1,i0)               fcci(CC_GE,r0,r1,i0)
177 #  define gtr_f(r0,r1,r2)               fccr(CC_GT,r0,r1,r2)
178 #  define gti_f(r0,r1,i0)               fcci(CC_GT,r0,r1,i0)
179 #  define ner_f(r0,r1,r2)               fccr(CC_NE,r0,r1,r2)
180 #  define nei_f(r0,r1,i0)               fcci(CC_NE,r0,r1,i0)
181 #  define unltr_f(r0,r1,r2)             fccr(CC_LT,r0,r1,r2)
182 #  define unlti_f(r0,r1,i0)             fcci(CC_LT,r0,r1,i0)
183 #  define unler_f(r0,r1,r2)             fccr(CC_LE,r0,r1,r2)
184 #  define unlei_f(r0,r1,i0)             fcci(CC_LE,r0,r1,i0)
185 #  define uneqr_f(r0,r1,r2)             _uneqr_f(_jit,r0,r1,r2)
186 static void _uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
187 #  define uneqi_f(r0,r1,i0)             _uneqi_f(_jit,r0,r1,i0)
188 static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
189 #  define unger_f(r0,r1,r2)             fccr(CC_PL,r0,r1,r2)
190 #  define ungei_f(r0,r1,i0)             fcci(CC_PL,r0,r1,i0)
191 #  define ungtr_f(r0,r1,r2)             fccr(CC_HI,r0,r1,r2)
192 #  define ungti_f(r0,r1,i0)             fcci(CC_HI,r0,r1,i0)
193 #  define ltgtr_f(r0,r1,r2)             _ltgtr_f(_jit,r0,r1,r2)
194 static void _ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
195 #  define ltgti_f(r0,r1,i0)             _ltgti_f(_jit,r0,r1,i0)
196 static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t);
197 #  define ordr_f(r0,r1,r2)              fccr(CC_VC,r0,r1,r2)
198 #  define ordi_f(r0,r1,i0)              fcci(CC_VC,r0,r1,i0)
199 #  define unordr_f(r0,r1,r2)            fccr(CC_VS,r0,r1,r2)
200 #  define unordi_f(r0,r1,i0)            fcci(CC_VS,r0,r1,i0)
201 #  define fbccr(cc,i0,r0,r1)            _fbccr(_jit,cc,i0,r0,r1)
202 static jit_word_t
203 _fbccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
204 #  define fbcci(cc,i0,r0,i1)            _fbcci(_jit,cc,i0,r0,i1)
205 static jit_word_t
206 _fbcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float32_t);
207 #  define bltr_f(i0,r0,r1)              fbccr(BCC_MI,i0,r0,r1)
208 #  define blti_f(i0,r0,i1)              fbcci(BCC_MI,i0,r0,i1)
209 #  define bler_f(i0,r0,r1)              fbccr(BCC_LS,i0,r0,r1)
210 #  define blei_f(i0,r0,i1)              fbcci(BCC_LS,i0,r0,i1)
211 #  define beqr_f(i0,r0,r1)              fbccr(BCC_EQ,i0,r0,r1)
212 #  define beqi_f(i0,r0,i1)              fbcci(BCC_EQ,i0,r0,i1)
213 #  define bger_f(i0,r0,r1)              fbccr(BCC_GE,i0,r0,r1)
214 #  define bgei_f(i0,r0,i1)              fbcci(BCC_GE,i0,r0,i1)
215 #  define bgtr_f(i0,r0,r1)              fbccr(BCC_GT,i0,r0,r1)
216 #  define bgti_f(i0,r0,i1)              fbcci(BCC_GT,i0,r0,i1)
217 #  define bner_f(i0,r0,r1)              fbccr(BCC_NE,i0,r0,r1)
218 #  define bnei_f(i0,r0,i1)              fbcci(BCC_NE,i0,r0,i1)
219 #  define bunltr_f(i0,r0,r1)            fbccr(BCC_LT,i0,r0,r1)
220 #  define bunlti_f(i0,r0,i1)            fbcci(BCC_LT,i0,r0,i1)
221 #  define bunler_f(i0,r0,r1)            fbccr(BCC_LE,i0,r0,r1)
222 #  define bunlei_f(i0,r0,i1)            fbcci(BCC_LE,i0,r0,i1)
223 #  define buneqr_f(i0,r0,r1)            _buneqr_f(_jit,i0,r0,r1)
224 static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
225 #  define buneqi_f(i0,r0,i1)            _buneqi_f(_jit,i0,r0,i1)
226 static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
227 #  define bunger_f(i0,r0,r1)            fbccr(BCC_PL,i0,r0,r1)
228 #  define bungei_f(i0,r0,i1)            fbcci(BCC_PL,i0,r0,i1)
229 #  define bungtr_f(i0,r0,r1)            fbccr(BCC_HI,i0,r0,r1)
230 #  define bungti_f(i0,r0,i1)            fbcci(BCC_HI,i0,r0,i1)
231 #  define bltgtr_f(i0,r0,r1)            _bltgtr_f(_jit,i0,r0,r1)
232 static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
233 #  define bltgti_f(i0,r0,i1)            _bltgti_f(_jit,i0,r0,i1)
234 static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t);
235 #  define bordr_f(i0,r0,r1)             fbccr(BCC_VC,i0,r0,r1)
236 #  define bordi_f(i0,r0,i1)             fbcci(BCC_VC,i0,r0,i1)
237 #  define bunordr_f(i0,r0,r1)           fbccr(BCC_VS,i0,r0,r1)
238 #  define bunordi_f(i0,r0,i1)           fbcci(BCC_VS,i0,r0,i1)
239 #  define addr_d(r0,r1,r2)              FADDD(r0,r1,r2)
240 #  define addi_d(r0,r1,i0)              _addi_d(_jit,r0,r1,i0)
241 static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
242 #  define subr_d(r0,r1,r2)              FSUBD(r0,r1,r2)
243 #  define subi_d(r0,r1,i0)              _subi_d(_jit,r0,r1,i0)
244 static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
245 #  define rsbr_d(r0, r1, r2)            subr_d(r0, r2, r1)
246 #  define rsbi_d(r0, r1, i0)            _rsbi_d(_jit, r0, r1, i0)
247 static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
248 #  define mulr_d(r0,r1,r2)              FMULD(r0,r1,r2)
249 #  define muli_d(r0,r1,i0)              _muli_d(_jit,r0,r1,i0)
250 static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
251 #  define divr_d(r0,r1,r2)              FDIVD(r0,r1,r2)
252 #  define divi_d(r0,r1,i0)              _divi_d(_jit,r0,r1,i0)
253 static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
254 #  define absr_d(r0,r1)                 FABSD(r0,r1)
255 #  define negr_d(r0,r1)                 FNEGD(r0,r1)
256 #  define sqrtr_d(r0,r1)                FSQRTD(r0,r1)
257 #  define fmar_d(r0,r1,r2,r3)           FMADDD(r0,r1,r2,r3)
258 #  define fmsr_d(r0,r1,r2,r3)           FNMSUBD(r0,r1,r2,r3)
259 #  define fnmar_d(r0,r1,r2,r3)          FNMADDD(r0,r1,r2,r3)
260 #  define fnmsr_d(r0,r1,r2,r3)          FMSUBD(r0,r1,r2,r3)
261 #  define extr_d(r0,r1)                 SCVTFD(r0,r1)
262 #  define ldr_d(r0,r1)                  _ldr_d(_jit,r0,r1)
263 static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t);
264 #  define ldi_d(r0,i0)                  _ldi_d(_jit,r0,i0)
265 static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
266 #  define ldxr_d(r0,r1,r2)              _ldxr_d(_jit,r0,r1,r2)
267 static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
268 #  define ldxi_d(r0,r1,i0)              _ldxi_d(_jit,r0,r1,i0)
269 static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
270 #  define str_d(r0,r1)                  _str_d(_jit,r0,r1)
271 static void _str_d(jit_state_t*,jit_int32_t,jit_int32_t);
272 #  define sti_d(i0,r0)                  _sti_d(_jit,i0,r0)
273 static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
274 #  define stxr_d(r0,r1,r2)              _stxr_d(_jit,r0,r1,r2)
275 static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
276 #  define stxi_d(i0,r0,r1)              _stxi_d(_jit,i0,r0,r1)
277 static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
278 #  define movr_d(r0,r1)                 _movr_d(_jit,r0,r1)
279 static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
280 #  define movi_d(r0,i0)                 _movi_d(_jit,r0,i0)
281 static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t);
282 #  define movr_w_d(r0, r1)              FMOVDX(r0, r1)
283 #  define movr_d_w(r0, r1)              FMOVXD(r0, r1)
284 #define movi_w_d(r0, i0)                _movi_w_d(_jit, r0, i0)
285 static void _movi_w_d(jit_state_t*, jit_int32_t, jit_word_t);
286 #  define extr_f_d(r0,r1)               FCVT_DS(r0,r1)
287 #  define dccr(cc,r0,r1,r2)             _dccr(_jit,cc,r0,r1,r2)
288 static void _dccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
289 #  define dcci(cc,r0,r1,i0)             _dcci(_jit,cc,r0,r1,i0)
290 static void _dcci(jit_state_t*,
291                   jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t);
292 #  define ltr_d(r0,r1,r2)               dccr(CC_MI,r0,r1,r2)
293 #  define lti_d(r0,r1,i0)               dcci(CC_MI,r0,r1,i0)
294 #  define ler_d(r0,r1,r2)               dccr(CC_LS,r0,r1,r2)
295 #  define lei_d(r0,r1,i0)               dcci(CC_LS,r0,r1,i0)
296 #  define eqr_d(r0,r1,r2)               dccr(CC_EQ,r0,r1,r2)
297 #  define eqi_d(r0,r1,i0)               dcci(CC_EQ,r0,r1,i0)
298 #  define ger_d(r0,r1,r2)               dccr(CC_GE,r0,r1,r2)
299 #  define gei_d(r0,r1,i0)               dcci(CC_GE,r0,r1,i0)
300 #  define gtr_d(r0,r1,r2)               dccr(CC_GT,r0,r1,r2)
301 #  define gti_d(r0,r1,i0)               dcci(CC_GT,r0,r1,i0)
302 #  define ner_d(r0,r1,r2)               dccr(CC_NE,r0,r1,r2)
303 #  define nei_d(r0,r1,i0)               dcci(CC_NE,r0,r1,i0)
304 #  define unltr_d(r0,r1,r2)             dccr(CC_LT,r0,r1,r2)
305 #  define unlti_d(r0,r1,i0)             dcci(CC_LT,r0,r1,i0)
306 #  define unler_d(r0,r1,r2)             dccr(CC_LE,r0,r1,r2)
307 #  define unlei_d(r0,r1,i0)             dcci(CC_LE,r0,r1,i0)
308 #  define uneqr_d(r0,r1,r2)             _uneqr_d(_jit,r0,r1,r2)
309 static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
310 #  define uneqi_d(r0,r1,i0)             _uneqi_d(_jit,r0,r1,i0)
311 static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
312 #  define unger_d(r0,r1,r2)             dccr(CC_PL,r0,r1,r2)
313 #  define ungei_d(r0,r1,i0)             dcci(CC_PL,r0,r1,i0)
314 #  define ungtr_d(r0,r1,r2)             dccr(CC_HI,r0,r1,r2)
315 #  define ungti_d(r0,r1,i0)             dcci(CC_HI,r0,r1,i0)
316 #  define ltgtr_d(r0,r1,r2)             _ltgtr_d(_jit,r0,r1,r2)
317 static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
318 #  define ltgti_d(r0,r1,i0)             _ltgti_d(_jit,r0,r1,i0)
319 static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t);
320 #  define ordr_d(r0,r1,r2)              dccr(CC_VC,r0,r1,r2)
321 #  define ordi_d(r0,r1,i0)              dcci(CC_VC,r0,r1,i0)
322 #  define unordr_d(r0,r1,r2)            dccr(CC_VS,r0,r1,r2)
323 #  define unordi_d(r0,r1,i0)            dcci(CC_VS,r0,r1,i0)
324 #  define dbccr(cc,i0,r0,r1)            _dbccr(_jit,cc,i0,r0,r1)
325 static jit_word_t
326 _dbccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
327 #  define dbcci(cc,i0,r0,i1)            _dbcci(_jit,cc,i0,r0,i1)
328 static jit_word_t
329 _dbcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float64_t);
330 #  define bltr_d(i0,r0,r1)              dbccr(BCC_MI,i0,r0,r1)
331 #  define blti_d(i0,r0,i1)              dbcci(BCC_MI,i0,r0,i1)
332 #  define bler_d(i0,r0,r1)              dbccr(BCC_LS,i0,r0,r1)
333 #  define blei_d(i0,r0,i1)              dbcci(BCC_LS,i0,r0,i1)
334 #  define beqr_d(i0,r0,r1)              dbccr(BCC_EQ,i0,r0,r1)
335 #  define beqi_d(i0,r0,i1)              dbcci(BCC_EQ,i0,r0,i1)
336 #  define bger_d(i0,r0,r1)              dbccr(BCC_GE,i0,r0,r1)
337 #  define bgei_d(i0,r0,i1)              dbcci(BCC_GE,i0,r0,i1)
338 #  define bgtr_d(i0,r0,r1)              dbccr(BCC_GT,i0,r0,r1)
339 #  define bgti_d(i0,r0,i1)              dbcci(BCC_GT,i0,r0,i1)
340 #  define bner_d(i0,r0,r1)              dbccr(BCC_NE,i0,r0,r1)
341 #  define bnei_d(i0,r0,i1)              dbcci(BCC_NE,i0,r0,i1)
342 #  define bunltr_d(i0,r0,r1)            dbccr(BCC_LT,i0,r0,r1)
343 #  define bunlti_d(i0,r0,i1)            dbcci(BCC_LT,i0,r0,i1)
344 #  define bunler_d(i0,r0,r1)            dbccr(BCC_LE,i0,r0,r1)
345 #  define bunlei_d(i0,r0,i1)            dbcci(BCC_LE,i0,r0,i1)
346 #  define buneqr_d(i0,r0,r1)            _buneqr_d(_jit,i0,r0,r1)
347 static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
348 #  define buneqi_d(i0,r0,i1)            _buneqi_d(_jit,i0,r0,i1)
349 static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
350 #  define bunger_d(i0,r0,r1)            dbccr(BCC_PL,i0,r0,r1)
351 #  define bungei_d(i0,r0,i1)            dbcci(BCC_PL,i0,r0,i1)
352 #  define bungtr_d(i0,r0,r1)            dbccr(BCC_HI,i0,r0,r1)
353 #  define bungti_d(i0,r0,i1)            dbcci(BCC_HI,i0,r0,i1)
354 #  define bltgtr_d(i0,r0,r1)            _bltgtr_d(_jit,i0,r0,r1)
355 static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
356 #  define bltgti_d(i0,r0,i1)            _bltgti_d(_jit,i0,r0,i1)
357 static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t);
358 #  define bordr_d(i0,r0,r1)             dbccr(BCC_VC,i0,r0,r1)
359 #  define bordi_d(i0,r0,i1)             dbcci(BCC_VC,i0,r0,i1)
360 #  define bunordr_d(i0,r0,r1)           dbccr(BCC_VS,i0,r0,r1)
361 #  define bunordi_d(i0,r0,i1)           dbcci(BCC_VS,i0,r0,i1)
362 #  define vaarg_d(r0, r1)               _vaarg_d(_jit, r0, r1)
363 static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
364 #endif
365
366 #if CODE
367 static void
368 _osvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz,
369        jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm)
370 {
371     instr_t     i;
372     assert(!(Rd &       ~0x1f));
373     assert(!(Rn &       ~0x1f));
374     assert(!(Rm &       ~0x1f));
375     assert(!(Sz &        ~0x3));
376     assert(!(Op & ~0xffe0fc00));
377     i.w = Op;
378     i.size.b = Sz;
379     i.Rd.b = Rd;
380     i.Rn.b = Rn;
381     i.Rm.b = Rm;
382     ii(i.w);
383 }
384
385 static void
386 _osvvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz,
387        jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm, jit_int32_t Ra)
388 {
389     instr_t     i;
390     assert(!(Rd &       ~0x1f));
391     assert(!(Rn &       ~0x1f));
392     assert(!(Rm &       ~0x1f));
393     assert(!(Ra &       ~0x1f));
394     assert(!(Sz &        ~0x3));
395     assert(!(Op & ~0xff208000));
396     i.w = Op;
397     i.size.b = Sz;
398     i.Rd.b = Rd;
399     i.Rn.b = Rn;
400     i.Rm.b = Rm;
401     i.Ra.b = Ra;
402     ii(i.w);
403 }
404
405 static void
406 _osvv_(jit_state_t *_jit, jit_int32_t Op,
407        jit_int32_t Sz, jit_int32_t Rd, jit_int32_t Rn)
408 {
409     instr_t     i;
410     assert(!(Rd &       ~0x1f));
411     assert(!(Rn &       ~0x1f));
412     assert(!(Sz &        ~0x3));
413     assert(!(Op & ~0xfffffc00));
414     i.w = Op;
415     i.size.b = Sz;
416     i.Rd.b = Rd;
417     i.Rn.b = Rn;
418     ii(i.w);
419 }
420
421 static void
422 _os_vv(jit_state_t *_jit, jit_int32_t Op,
423        jit_int32_t Sz, jit_int32_t Rn, jit_int32_t Rm)
424 {
425     instr_t     i;
426     assert(!(Rn &       ~0x1f));
427     assert(!(Rm &       ~0x1f));
428     assert(!(Sz &        ~0x3));
429     assert(!(Op & ~0xff20fc1f));
430     i.w = Op;
431     i.size.b = Sz;
432     i.Rn.b = Rn;
433     i.Rm.b = Rm;
434     ii(i.w);
435 }
436
437 static void
438 _vqo_vv(jit_state_t *_jit, jit_int32_t Q,
439         jit_int32_t Op, jit_int32_t Rn, jit_int32_t Rd)
440 {
441     instr_t     i;
442     assert(!(Rn &       ~0x1f));
443     assert(!(Rd &       ~0x1f));
444     assert(!(Q &         ~0x1));
445     assert(!(Op & ~0xbffffc00));
446     i.w = Op;
447     i.Q.b  = Q;
448     i.Rn.b = Rn;
449     i.Rd.b = Rd;
450     ii(i.w);
451 }
452
453 #define fopi(name)                                                      \
454 static void                                                             \
455 _##name##i_f(jit_state_t *_jit,                                         \
456              jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)          \
457 {                                                                       \
458     jit_int32_t         reg = jit_get_reg(jit_class_fpr);               \
459     movi_f(rn(reg), i0);                                                \
460     name##r_f(r0, r1, rn(reg));                                         \
461     jit_unget_reg(reg);                                                 \
462 }
463 #define dopi(name)                                                      \
464 static void                                                             \
465 _##name##i_d(jit_state_t *_jit,                                         \
466              jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)          \
467 {                                                                       \
468     jit_int32_t         reg = jit_get_reg(jit_class_fpr);               \
469     movi_d(rn(reg), i0);                                                \
470     name##r_d(r0, r1, rn(reg));                                         \
471     jit_unget_reg(reg);                                                 \
472 }
473 #define fbopi(name)                                                     \
474 static jit_word_t                                                       \
475 _b##name##i_f(jit_state_t *_jit,                                        \
476               jit_word_t i0, jit_int32_t r0, jit_float32_t i1)          \
477 {                                                                       \
478     jit_word_t          word;                                           \
479     jit_int32_t         reg = jit_get_reg(jit_class_fpr|                \
480                                           jit_class_nospill);           \
481     movi_f(rn(reg), i1);                                                \
482     word = b##name##r_f(i0, r0, rn(reg));                               \
483     jit_unget_reg(reg);                                                 \
484     return (word);                                                      \
485 }
486 #define dbopi(name)                                                     \
487 static jit_word_t                                                       \
488 _b##name##i_d(jit_state_t *_jit,                                        \
489               jit_word_t i0, jit_int32_t r0, jit_float64_t i1)          \
490 {                                                                       \
491     jit_word_t          word;                                           \
492     jit_int32_t         reg = jit_get_reg(jit_class_fpr|                \
493                                           jit_class_nospill);           \
494     movi_d(rn(reg), i1);                                                \
495     word = b##name##r_d(i0, r0, rn(reg));                               \
496     jit_unget_reg(reg);                                                 \
497     return (word);                                                      \
498 }
499
500 static void
501 _popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
502 {
503     jit_int32_t         reg;
504     reg = jit_get_reg(jit_class_fpr);
505     FMOVDX(rn(reg), r1);
506     CNT(rn(reg), rn(reg));
507     ADDV(rn(reg), rn(reg));
508     FMOVXD(r0, rn(reg));
509     jit_unget_reg(reg);
510 }
511
512 static void
513 _truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
514 {
515     FCVTSZ_WS(r0, r1);
516     extr_i(r0, r0);
517 }
518
519 static void
520 _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
521 {
522     FCVTSZ_WD(r0, r1);
523     extr_i(r0, r0);
524 }
525
526 fopi(add)
527 fopi(sub)
528 fopi(rsb)
529 fopi(mul)
530 fopi(div)
531
532 static void
533 _ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
534 {
535     jit_int32_t         reg;
536     reg = jit_get_reg(jit_class_gpr);
537     ldr_i(rn(reg), r1);
538     FMOVSW(r0, rn(reg));
539     jit_unget_reg(reg);
540 }
541
542 static void
543 _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
544 {
545     jit_int32_t         reg;
546     reg = jit_get_reg(jit_class_gpr);
547     ldi_i(rn(reg), i0);
548     FMOVSW(r0, rn(reg));
549     jit_unget_reg(reg);
550 }
551
552 static void
553 _ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
554 {
555     jit_int32_t         reg;
556     reg = jit_get_reg(jit_class_gpr);
557     ldxr_i(rn(reg), r1, r2);
558     FMOVSW(r0, rn(reg));
559     jit_unget_reg(reg);
560 }
561
562 static void
563 _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
564 {
565     jit_int32_t         reg;
566     reg = jit_get_reg(jit_class_gpr);
567     ldxi_i(rn(reg), r1, i0);
568     FMOVSW(r0, rn(reg));
569     jit_unget_reg(reg);
570 }
571
572 static void
573 _str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
574 {
575     jit_int32_t         reg;
576     reg = jit_get_reg(jit_class_gpr);
577     FMOVWS(rn(reg), r1);
578     str_i(r0, rn(reg));
579     jit_unget_reg(reg);
580 }
581
582 static void
583 _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
584 {
585     jit_int32_t         reg;
586     reg = jit_get_reg(jit_class_gpr);
587     FMOVWS(rn(reg), r0);
588     sti_i(i0, rn(reg));
589     jit_unget_reg(reg);
590 }
591
592 static void
593 _stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
594 {
595     jit_int32_t         reg;
596     reg = jit_get_reg(jit_class_gpr);
597     FMOVWS(rn(reg), r2);
598     stxr_i(r0, r1, rn(reg));
599     jit_unget_reg(reg);
600 }
601
602 static void
603 _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
604 {
605     jit_int32_t         reg;
606     reg = jit_get_reg(jit_class_gpr);
607     FMOVWS(rn(reg), r1);
608     stxi_i(i0, r0, rn(reg));
609     jit_unget_reg(reg);
610 }
611
612 static void
613 _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
614 {
615     if (r0 != r1)
616         FMOVS(r0, r1);
617 }
618
619 static void
620 _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
621 {
622     union {
623         jit_int32_t     i;
624         jit_float32_t   f;
625     } u;
626     jit_int32_t         reg;
627     u.f = i0;
628     if (u.i == 0)
629         FMOVSW(r0, WZR_REGNO);
630     else {
631         reg = jit_get_reg(jit_class_gpr);
632         /* prevent generating unused top 32 bits */
633         movi(rn(reg), ((jit_word_t)u.i) & 0xffffffff);
634         FMOVSW(r0, rn(reg));
635         jit_unget_reg(reg);
636     }
637 }
638
639 static void
640 _movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
641 {
642     jit_int32_t                reg;
643     reg = jit_get_reg(jit_class_gpr);
644     movi(rn(reg), i0);
645     movr_w_f(r0, rn(reg));
646     jit_unget_reg(reg);
647 }
648
649 static void
650 _fccr(jit_state_t *_jit, jit_int32_t cc,
651       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
652 {
653     FCMPES(r1, r2);
654     CSET(r0, cc);
655 }
656
657 static void
658 _fcci(jit_state_t *_jit, jit_int32_t cc,
659       jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
660 {
661     jit_int32_t         reg;
662     reg = jit_get_reg(jit_class_fpr);
663     movi_f(rn(reg), i0);
664     fccr(cc, r0, r1, rn(reg));
665     jit_unget_reg(reg);
666 }
667
668 static void
669 _uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
670 {
671     jit_word_t          w;
672     FCMPES(r1, r2);
673     CSET(r0, CC_VS);
674     w = _jit->pc.w;
675     B_C(BCC_VS, 1);             /* unordered satisfies condition */
676     CSET(r0, CC_EQ);            /* equal satisfies condition */
677     patch_at(w, _jit->pc.w);
678 }
679 fopi(uneq)
680
681 static void
682 _ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
683 {
684     jit_word_t          w;
685     FCMPES(r1, r2);
686     CSET(r0, CC_VC);            /* set to 1 if ordered */
687     w = _jit->pc.w;
688     B_C(BCC_VS, 1);             /* unordered does not satisfy condition */
689     CSET(r0, CC_NE);            /* set to 1 if not equal */
690     patch_at(w, _jit->pc.w);
691 }
692 fopi(ltgt)
693
694 static jit_word_t
695 _fbccr(jit_state_t *_jit, jit_int32_t cc,
696        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
697 {
698     jit_word_t          w, d;
699     FCMPES(r0, r1);
700     w = _jit->pc.w;
701     d = (i0 - w) >> 2;
702     B_C(cc, d);
703     return (w);
704 }
705
706 static jit_word_t
707 _fbcci(jit_state_t *_jit, jit_int32_t cc,
708        jit_word_t i0, jit_int32_t r0, jit_float32_t i1)
709 {
710     jit_word_t          w;
711     jit_int32_t         reg;
712     reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
713     movi_f(rn(reg), i1);
714     w = fbccr(cc, i0, r0, rn(reg));
715     jit_unget_reg(reg);
716     return (w);
717 }
718
719 static jit_word_t
720 _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
721 {
722     jit_word_t          u, v, w;
723     FCMPES(r0, r1);
724     u = _jit->pc.w;
725     B_C(BCC_VS, 1);             /* unordered satisfies condition */
726     v = _jit->pc.w;
727     B_C(BCC_NE, 1);             /* not equal (or unordered) does not satisfy */
728     patch_at(u, _jit->pc.w);
729     w = _jit->pc.w;
730     B((i0 - w) >> 2);
731     patch_at(v, _jit->pc.w);
732     return (w);
733 }
734 fbopi(uneq)
735
736 static jit_word_t
737 _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
738 {
739     jit_word_t          u, v, w;
740     FCMPES(r0, r1);
741     u = _jit->pc.w;
742     B_C(BCC_VS, 2);             /* jump over if unordered */
743     v = _jit->pc.w;
744     B_C(BCC_EQ, 1);             /* jump over if equal */
745     w = _jit->pc.w;
746     B((i0 - w) >> 2);
747     patch_at(u, _jit->pc.w);
748     patch_at(v, _jit->pc.w);
749     return (w);
750 }
751 fbopi(ltgt)
752
753 dopi(add)
754 dopi(sub)
755 dopi(rsb)
756 dopi(mul)
757 dopi(div)
758
759 static void
760 _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
761 {
762     jit_int32_t         reg;
763     reg = jit_get_reg(jit_class_gpr);
764     ldr_l(rn(reg), r1);
765     FMOVDX(r0, rn(reg));
766     jit_unget_reg(reg);
767 }
768
769 static void
770 _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
771 {
772     jit_int32_t         reg;
773     reg = jit_get_reg(jit_class_gpr);
774     ldi_l(rn(reg), i0);
775     FMOVDX(r0, rn(reg));
776     jit_unget_reg(reg);
777 }
778
779 static void
780 _ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
781 {
782     jit_int32_t         reg;
783     reg = jit_get_reg(jit_class_gpr);
784     ldxr_l(rn(reg), r1, r2);
785     FMOVDX(r0, rn(reg));
786     jit_unget_reg(reg);
787 }
788
789 static void
790 _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
791 {
792     jit_int32_t         reg;
793     reg = jit_get_reg(jit_class_gpr);
794     ldxi_l(rn(reg), r1, i0);
795     FMOVDX(r0, rn(reg));
796     jit_unget_reg(reg);
797 }
798
799 static void
800 _str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
801 {
802     jit_int32_t         reg;
803     reg = jit_get_reg(jit_class_gpr);
804     FMOVXD(rn(reg), r1);
805     str_l(r0, rn(reg));
806     jit_unget_reg(reg);
807 }
808
809 static void
810 _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
811 {
812     jit_int32_t         reg;
813     reg = jit_get_reg(jit_class_gpr);
814     FMOVXD(rn(reg), r0);
815     sti_l(i0, rn(reg));
816     jit_unget_reg(reg);
817 }
818
819 static void
820 _stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
821 {
822     jit_int32_t         reg;
823     reg = jit_get_reg(jit_class_gpr);
824     FMOVXD(rn(reg), r2);
825     stxr_l(r0, r1, rn(reg));
826     jit_unget_reg(reg);
827 }
828
829 static void
830 _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
831 {
832     jit_int32_t         reg;
833     reg = jit_get_reg(jit_class_gpr);
834     FMOVXD(rn(reg), r1);
835     stxi_l(i0, r0, rn(reg));
836     jit_unget_reg(reg);
837 }
838
839 static void
840 _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
841 {
842     if (r0 != r1)
843         FMOVD(r0, r1);
844 }
845
846 static void
847 _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
848 {
849     union {
850         jit_int64_t     l;
851         jit_float64_t   d;
852     } u;
853     jit_int32_t         reg;
854     u.d = i0;
855     if (u.l == 0)
856         FMOVDX(r0, XZR_REGNO);
857     else {
858         reg = jit_get_reg(jit_class_gpr);
859         movi(rn(reg), u.l);
860         FMOVDX(r0, rn(reg));
861         jit_unget_reg(reg);
862     }
863 }
864
865 static void
866 _movi_w_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
867 {
868     jit_int32_t                reg;
869     reg = jit_get_reg(jit_class_gpr);
870     movi(rn(reg), i0);
871     movr_w_d(r0, rn(reg));
872     jit_unget_reg(reg);
873 }
874
875 static void
876 _dccr(jit_state_t *_jit, jit_int32_t cc,
877       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
878 {
879     FCMPED(r1, r2);
880     CSET(r0, cc);
881 }
882
883 static void
884 _dcci(jit_state_t *_jit, jit_int32_t cc,
885       jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
886 {
887     jit_int32_t         reg;
888     reg = jit_get_reg(jit_class_fpr);
889     movi_d(rn(reg), i0);
890     dccr(cc, r0, r1, rn(reg));
891     jit_unget_reg(reg);
892 }
893
894 static void
895 _uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
896 {
897     jit_word_t          w;
898     FCMPED(r1, r2);
899     CSET(r0, CC_VS);
900     w = _jit->pc.w;
901     B_C(BCC_VS, 1);             /* unordered satisfies condition */
902     CSET(r0, CC_EQ);            /* equal satisfies condition */
903     patch_at(w, _jit->pc.w);
904 }
905 dopi(uneq)
906
907 static void
908 _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
909 {
910     jit_word_t          w;
911     FCMPED(r1, r2);
912     CSET(r0, CC_VC);            /* set to 1 if ordered */
913     w = _jit->pc.w;
914     B_C(BCC_VS, 1);             /* unordered does not satisfy condition */
915     CSET(r0, CC_NE);            /* set to 1 if not equal */
916     patch_at(w, _jit->pc.w);
917 }
918 dopi(ltgt)
919
920 static jit_word_t
921 _dbccr(jit_state_t *_jit, jit_int32_t cc,
922        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
923 {
924     jit_word_t          w, d;
925     FCMPED(r0, r1);
926     w = _jit->pc.w;
927     d = (i0 - w) >> 2;
928     B_C(cc, d);
929     return (w);
930 }
931
932 static jit_word_t
933 _dbcci(jit_state_t *_jit, jit_int32_t cc,
934        jit_word_t i0, jit_int32_t r0, jit_float64_t i1)
935 {
936     jit_word_t          w;
937     jit_int32_t         reg;
938     reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
939     movi_d(rn(reg), i1);
940     w = dbccr(cc, i0, r0, rn(reg));
941     jit_unget_reg(reg);
942     return (w);
943 }
944
945 static jit_word_t
946 _buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
947 {
948     jit_word_t          u, v, w;
949     FCMPED(r0, r1);
950     u = _jit->pc.w;
951     B_C(BCC_VS, 1);             /* unordered satisfies condition */
952     v = _jit->pc.w;
953     B_C(BCC_NE, 1);             /* not equal (or unordered) does not satisfy */
954     patch_at(u, _jit->pc.w);
955     w = _jit->pc.w;
956     B((i0 - w) >> 2);
957     patch_at(v, _jit->pc.w);
958     return (w);
959 }
960 dbopi(uneq)
961
962 static jit_word_t
963 _bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
964 {
965     jit_word_t          u, v, w;
966     FCMPED(r0, r1);
967     u = _jit->pc.w;
968     B_C(BCC_VS, 2);             /* jump over if unordered */
969     v = _jit->pc.w;
970     B_C(BCC_EQ, 1);             /* jump over if equal */
971     w = _jit->pc.w;
972     B((i0 - w) >> 2);
973     patch_at(u, _jit->pc.w);
974     patch_at(v, _jit->pc.w);
975     return (w);
976 }
977 dbopi(ltgt)
978
979 static void
980 _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
981 {
982 #if !__APPLE__
983     jit_word_t          ge_code;
984     jit_word_t          lt_code;
985     jit_int32_t         rg0, rg1;
986
987     assert(_jitc->function->self.call & jit_call_varargs);
988
989     rg0 = jit_get_reg(jit_class_gpr);
990     rg1 = jit_get_reg(jit_class_gpr);
991
992     /* Load the fp offset in save area in the first temporary. */
993     ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
994
995     /* Jump over if there are no remaining arguments in the save area. */
996     ge_code = bgei(_jit->pc.w, rn(rg0), 0);
997
998     /* Load the gp save pointer in the second temporary. */
999     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, fptop));
1000
1001     /* Load the vararg argument in the first argument. */
1002     ldxr_d(r0, rn(rg1), rn(rg0));
1003
1004     /* Update the fp offset. */
1005     addi(rn(rg0), rn(rg0), 16);
1006     stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
1007
1008     /* Will only need one temporary register below. */
1009     jit_unget_reg(rg1);
1010
1011     /* Jump over overflow code. */
1012     lt_code = jmpi(_jit->pc.w);
1013
1014     /* Where to land if argument is in overflow area. */
1015     patch_at(ge_code, _jit->pc.w);
1016
1017     /* Load stack pointer. */
1018     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, stack));
1019
1020     /* Load argument. */
1021     ldr_d(r0, rn(rg0));
1022
1023     /* Update stack pointer. */
1024     addi(rn(rg0), rn(rg0), 8);
1025     stxi(offsetof(jit_va_list_t, stack), r1, rn(rg0));
1026
1027     /* Where to land if argument is in gp save area. */
1028     patch_at(lt_code, _jit->pc.w);
1029
1030     jit_unget_reg(rg0);
1031 #else
1032     assert(_jitc->function->self.call & jit_call_varargs);
1033     ldr_d(r0, r1);
1034     addi(r1, r1, sizeof(jit_float64_t));
1035 #endif
1036 }
1037 #endif