2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 * This file is part of GNU lightning.
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
17 * Paulo Cesar Pereira de Andrade
21 # define _XMM6_REGNO 6
22 # define _XMM7_REGNO 7
23 # define _XMM8_REGNO 8
24 # define _XMM9_REGNO 9
25 # define _XMM10_REGNO 10
26 # define _XMM11_REGNO 11
27 # define _XMM12_REGNO 12
28 # define _XMM13_REGNO 13
29 # define _XMM14_REGNO 14
30 # define _XMM15_REGNO 15
31 #define X86_SSE_MOV 0x10
32 #define X86_SSE_MOV1 0x11
33 #define X86_SSE_MOVLP 0x12
34 #define X86_SSE_MOVHP 0x16
35 #define X86_SSE_MOVA 0x28
36 #define X86_SSE_CVTIS 0x2a
37 #define X86_SSE_CVTTSI 0x2c
38 #define X86_SSE_CVTSI 0x2d
39 #define X86_SSE_UCOMI 0x2e
40 #define X86_SSE_COMI 0x2f
41 #define X86_SSE_ROUND 0x3a
42 #define X86_SSE_SQRT 0x51
43 #define X86_SSE_RSQRT 0x52
44 #define X86_SSE_RCP 0x53
45 #define X86_SSE_AND 0x54
46 #define X86_SSE_ANDN 0x55
47 #define X86_SSE_OR 0x56
48 #define X86_SSE_XOR 0x57
49 #define X86_SSE_ADD 0x58
50 #define X86_SSE_MUL 0x59
51 #define X86_SSE_CVTSD 0x5a
52 #define X86_SSE_CVTDT 0x5b
53 #define X86_SSE_SUB 0x5c
54 #define X86_SSE_MIN 0x5d
55 #define X86_SSE_DIV 0x5e
56 #define X86_SSE_MAX 0x5f
57 #define X86_SSE_X2G 0x6e
58 #define X86_SSE_EQB 0x74
59 #define X86_SSE_EQW 0x75
60 #define X86_SSE_EQD 0x76
61 #define X86_SSE_G2X 0x7e
62 #define X86_SSE_MOV2 0xd6
63 # define sser(c,r0,r1) _sser(_jit,c,r0,r1)
64 static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
65 # define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
66 static void _ssexr(jit_state_t*,
67 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
68 # define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
69 static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
70 # define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
71 # define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1)
72 # define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1)
73 # define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1)
74 # define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1)
75 # define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1)
76 # define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1)
77 # define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1)
78 # define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1)
79 # define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1)
80 # define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
81 # define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
83 # define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
84 # define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
85 # define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
86 # define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
88 # define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
89 # define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
91 # define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
92 # define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
93 # define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)
94 # define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
95 # define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
96 # define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
97 # define movdxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
98 # define movdrx(r0,r1) ssexr(0x66, X86_SSE_G2X,r0,r1)
99 # define movqxr(r0,r1) sselxr(0x66, X86_SSE_X2G,r0,r1)
100 # define movqrx(r0,r1) sselxr(0x66, X86_SSE_G2X,r0,r1)
101 # define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
102 # define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
103 # define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
104 # define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
105 # define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
106 # if __X64 && !__X64_32
107 # define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
109 _sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
111 # define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1)
113 # define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
114 # define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
115 # define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
116 # define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
117 # define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
119 _ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
120 jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
121 # define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2)
122 static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
123 # define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0)
124 static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
125 # define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2)
126 static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
127 # define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0)
128 static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
129 # define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2)
130 static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
131 # define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0)
132 static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
133 # define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2)
134 static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
135 # define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0)
136 static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
137 # define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1)
138 # define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0)
139 static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
140 # define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1)
141 # define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0)
142 static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
143 # define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2)
144 static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
145 # define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0)
146 static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
147 # define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2)
148 static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
149 # define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0)
150 static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
151 # define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2)
152 static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
153 # define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0)
154 static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
155 # define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2)
156 static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
157 # define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0)
158 static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
159 # define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1)
160 static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
161 # define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1)
162 static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
163 # define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1)
164 static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
165 # define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1)
166 static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
167 # define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
168 # define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
169 # define sse_fmar_f(r0, r1, r2, r3) _sse_fmar_f(_jit, r0, r1, r2, r3)
170 static void _sse_fmar_f(jit_state_t*,
171 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
172 # define sse_fmar_d(r0, r1, r2, r3) _sse_fmar_d(_jit, r0, r1, r2, r3)
173 static void _sse_fmar_d(jit_state_t*,
174 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
175 # define sse_fmsr_f(r0, r1, r2, r3) _sse_fmsr_f(_jit, r0, r1, r2, r3)
176 static void _sse_fmsr_f(jit_state_t*,
177 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
178 # define sse_fmsr_d(r0, r1, r2, r3) _sse_fmsr_d(_jit, r0, r1, r2, r3)
179 static void _sse_fmsr_d(jit_state_t*,
180 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
181 # define sse_fnmar_f(r0, r1, r2, r3) _sse_fnmar_f(_jit, r0, r1, r2, r3)
182 static void _sse_fnmar_f(jit_state_t*,
183 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
184 # define sse_fnmar_d(r0, r1, r2, r3) _sse_fnmar_d(_jit, r0, r1, r2, r3)
185 static void _sse_fnmar_d(jit_state_t*,
186 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
187 # define sse_fnmsr_f(r0, r1, r2, r3) _sse_fnmsr_f(_jit, r0, r1, r2, r3)
188 static void _sse_fnmsr_f(jit_state_t*,
189 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
190 # define sse_fnmsr_d(r0, r1, r2, r3) _sse_fnmsr_d(_jit, r0, r1, r2, r3)
191 static void _sse_fnmsr_d(jit_state_t*,
192 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
193 # define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
194 # define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
196 _ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
197 jit_int32_t, jit_int32_t, jit_int32_t);
198 #define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1)
199 static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
200 #define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
201 static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
202 # define sse_movr_w_f(r0,r1) movdxr(r0, r1)
203 # define sse_movr_f_w(r0,r1) movdrx(r1, r0)
204 #define sse_movi_w_f(r0, i0) _sse_movi_w_f(_jit, r0, i0)
205 static void _sse_movi_w_f(jit_state_t*, jit_int32_t, jit_word_t);
206 # define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
207 static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
208 # define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
209 # define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0)
210 static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
211 # define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2)
212 # define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0)
213 static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
214 # define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2)
215 static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
216 # define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0)
217 static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
218 # define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1)
219 # define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0)
220 static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
221 # define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1)
222 # define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0)
223 static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
224 # define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2)
225 static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
226 # define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0)
227 static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
228 # define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1)
229 # define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0)
230 static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
231 # define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2)
232 # define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0)
233 static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
234 static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
235 # define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2)
236 static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
237 # define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0)
238 static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
239 # define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2)
240 static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
241 # define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0)
242 static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
243 # define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2)
244 # define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0)
245 static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
246 # define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2)
247 static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
248 # define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0)
249 static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
250 # define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1)
251 # define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0)
252 static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
253 # define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1)
254 # define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0)
255 # define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0)
256 static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
257 # define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2)
258 static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
259 # define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
260 static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
261 # define sse_unldr_x(r0, r1, i0) _sse_unldr_x(_jit, r0, r1, i0)
262 static void _sse_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
263 # define sse_unldi_x(r0, i0, i1) _sse_unldi_x(_jit, r0, i0, i1)
264 static void _sse_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
265 # define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
266 # define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
267 static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
268 # define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2)
269 static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
270 # define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
271 static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
272 #define sse_unstr_x(r0, r1, i0) _sse_unstr_x(_jit, r0, r1, i0)
273 static void _sse_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
274 #define sse_unsti_x(i0, r0, i1) _sse_unsti_x(_jit, i0, r0, i1)
275 static void _sse_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
276 # define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
277 static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
278 # define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
280 _sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
281 # define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1)
282 static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
283 # define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1)
285 _sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
286 # define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1)
287 static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
288 # define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1)
290 _sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
291 # define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1)
292 static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
293 # define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1)
295 _sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
296 # define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1)
297 static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
298 # define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1)
300 _sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
301 # define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1)
302 static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
303 # define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1)
305 _sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
306 # define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1)
307 static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
308 # define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1)
310 _sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
311 # define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1)
312 static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
313 # define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1)
315 _sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
316 # define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1)
317 static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
318 # define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1)
320 _sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
321 # define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1)
322 static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
323 # define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1)
325 _sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
326 # define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1)
327 static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
328 # define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1)
330 _sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
331 # define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1)
332 static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
333 # define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1)
335 _sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
336 # define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1)
337 static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
338 # define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1)
340 _sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
341 # define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1)
342 static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
343 # define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1)
345 _sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
346 #define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1)
347 static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
348 #define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
349 static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
350 # if __X32 || __X64_32
351 # define sse_movr_ww_d(r0, r1, r2) _sse_movr_ww_d(_jit, r0, r1, r2)
352 static void _sse_movr_ww_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
353 # define sse_movr_d_ww(r0, r1, r2) _sse_movr_d_ww(_jit, r0, r1, r2)
354 static void _sse_movr_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
355 # define sse_movi_ww_d(r0, i0, i1) _sse_movi_ww_d(_jit, r0, i0, i1)
356 static void _sse_movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
358 # define sse_movr_w_d(r0, r1) movqxr(r0, r1)
359 # define sse_movr_d_w(r0, r1) movqrx(r1, r0)
360 # define sse_movi_w_d(r0, i0) _sse_movi_w_d(_jit, r0, i0)
361 static void _sse_movi_w_d(jit_state_t*, jit_int32_t, jit_word_t);
363 # define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
364 # define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
365 static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
366 # define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2)
367 # define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0)
368 static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
369 # define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2)
370 static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
371 # define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0)
372 static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
373 # define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1)
374 # define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0)
375 static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
376 # define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1)
377 # define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0)
378 static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
379 # define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2)
380 static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
381 # define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0)
382 static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
383 # define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1)
384 # define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0)
385 static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
386 # define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2)
387 static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
388 # define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0)
389 static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
390 # define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2)
391 static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
392 # define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0)
393 static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
394 # define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2)
395 static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
396 # define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0)
397 static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
398 # define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2)
399 # define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0)
400 static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
401 # define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2)
402 static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
403 # define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0)
404 static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
405 # define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1)
406 # define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0)
407 static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
408 # define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1)
409 # define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0)
410 static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
411 # define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0)
412 # define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0)
413 static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
414 # define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2)
415 static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
416 # define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0)
417 static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
418 # define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1)
419 # define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1)
420 # define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0)
421 static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
422 # define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2)
423 static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
424 # define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1)
425 static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
426 static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
427 # define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1)
429 _sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
430 # define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1)
431 static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
432 # define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1)
434 _sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
435 # define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1)
436 static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
437 # define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1)
439 _sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
440 # define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1)
441 static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
442 # define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1)
444 _sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
445 # define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1)
446 static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
447 # define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1)
449 _sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
450 # define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1)
451 static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
452 # define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1)
454 _sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
455 # define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1)
456 static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
457 # define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1)
459 _sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
460 # define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1)
461 static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
462 # define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1)
464 _sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
465 # define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1)
466 static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
467 # define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1)
469 _sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
470 # define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1)
471 static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
472 # define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1)
474 _sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
475 # define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1)
476 static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
477 # define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1)
479 _sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
480 # define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1)
481 static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
482 # define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1)
484 _sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
485 # define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1)
486 static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
487 # define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1)
489 _sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
490 # define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1)
491 static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
492 # define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1)
494 _sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
498 # define fpr_opi(name, type, size) \
500 _sse_##name##i_##type(jit_state_t *_jit, \
501 jit_int32_t r0, jit_int32_t r1, \
502 jit_float##size##_t *i0) \
504 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
505 assert(jit_sse_reg_p(reg)); \
506 sse_movi_##type(rn(reg), i0); \
507 sse_##name##r_##type(r0, r1, rn(reg)); \
508 jit_unget_reg(reg); \
510 # define fpr_bopi(name, type, size) \
512 _sse_b##name##i_##type(jit_state_t *_jit, \
513 jit_word_t i0, jit_int32_t r0, \
514 jit_float##size##_t *i1) \
517 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
518 jit_class_nospill); \
519 assert(jit_sse_reg_p(reg)); \
520 sse_movi_##type(rn(reg), i1); \
521 w = sse_b##name##r_##type(i0, r0, rn(reg)); \
522 jit_unget_reg(reg); \
525 # define fopi(name) fpr_opi(name, f, 32)
526 # define fbopi(name) fpr_bopi(name, f, 32)
527 # define dopi(name) fpr_opi(name, d, 64)
528 # define dbopi(name) fpr_bopi(name, d, 64)
530 _sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
532 rex(0, 0, r0, 0, r1);
535 mrm(0x03, r7(r0), r7(r1));
539 _ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
540 jit_int32_t r0, jit_int32_t r1)
543 rex(0, 0, r0, 0, r1);
546 mrm(0x03, r7(r0), r7(r1));
550 _ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
551 jit_int32_t m, jit_int32_t i)
557 mrm(0x03, r7(m), r7(r0));
563 _sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
564 jit_int32_t r0, jit_int32_t r1)
567 rex(0, 1, r0, 0, r1);
570 mrm(0x03, r7(r0), r7(r1));
575 _ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
576 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
579 rex(0, 0, rd, ri, rb);
582 rx(rd, md, rb, ri, ms);
586 _sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
601 _sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
616 _sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
622 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
623 sse_movr_f(rn(reg), r0);
637 _sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
643 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
644 sse_movr_d(rn(reg), r0);
662 _sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
677 _sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
692 _sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
698 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
699 sse_movr_f(rn(reg), r0);
713 _sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
719 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
720 sse_movr_d(rn(reg), r0);
734 _sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
738 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
739 pcmpeqlr(rn(reg), rn(reg));
752 _sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
756 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
757 pcmpeqlr(rn(reg), rn(reg));
770 _sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
772 jit_int32_t freg, ireg;
773 ireg = jit_get_reg(jit_class_gpr);
774 imovi(rn(ireg), 0x80000000);
776 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
777 movdxr(rn(freg), rn(ireg));
778 xorpsr(r0, rn(freg));
782 movdxr(r0, rn(ireg));
789 _sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
791 jit_int32_t freg, ireg;
792 ireg = jit_get_reg(jit_class_gpr);
793 imovi(rn(ireg), 0x80000000);
795 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
796 movdxr(rn(freg), rn(ireg));
798 xorpdr(r0, rn(freg));
802 movdxr(r0, rn(ireg));
809 /* r1 = (r1 * r3) + r2 */
810 #define vfmadd132ss(r1, r2, r3) _vfmadd132sx(_jit, 0, r1, r2, r3)
811 #define vfmadd132sd(r1, r2, r3) _vfmadd132sx(_jit, 1, r1, r2, r3)
813 _vfmadd132sx(jit_state_t *_jit, jit_bool_t dbl,
814 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
817 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
819 mrm(0x03, r7(r1), r7(r3));
822 /* r1 = (r1 * r3) - r2 */
823 #define vfmsub132ss(r1, r2, r3) _vfmsub132sx(_jit, 0, r1, r2, r3)
824 #define vfmsub132sd(r1, r2, r3) _vfmsub132sx(_jit, 1, r1, r2, r3)
826 _vfmsub132sx(jit_state_t *_jit, jit_bool_t dbl,
827 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
830 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
832 mrm(0x03, r7(r1), r7(r3));
835 /* r1 = (r1 * r2) + r3 */
836 #define vfmadd213ss(r1, r2, r3) _vfmadd213sx(_jit, 0, r1, r2, r3)
837 #define vfmadd213sd(r1, r2, r3) _vfmadd213sx(_jit, 1, r1, r2, r3)
839 _vfmadd213sx(jit_state_t *_jit, jit_bool_t dbl,
840 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
843 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
845 mrm(0x03, r7(r1), r7(r3));
848 /* r1 = (r1 * r2) - r3 */
849 #define vfmsub213ss(r1, r2, r3) _vfmsub213sx(_jit, 0, r1, r2, r3)
850 #define vfmsub213sd(r1, r2, r3) _vfmsub213sx(_jit, 1, r1, r2, r3)
852 _vfmsub213sx(jit_state_t *_jit, jit_bool_t dbl,
853 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
856 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
858 mrm(0x03, r7(r1), r7(r3));
861 /* r1 = (r2 * r3) + r1 */
862 #define vfmadd231ss(r1, r2, r3) _vfmadd231sx(_jit, 0, r1, r2, r3)
863 #define vfmadd231sd(r1, r2, r3) _vfmadd231sx(_jit, 1, r1, r2, r3)
865 _vfmadd231sx(jit_state_t *_jit, jit_bool_t dbl,
866 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
869 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
871 mrm(0x03, r7(r1), r7(r3));
874 /* r1 = (r2 * r3) - r1 */
875 #define vfmsub231ss(r1, r2, r3) _vfmsub231sx(_jit, 0, r1, r2, r3)
876 #define vfmsub231sd(r1, r2, r3) _vfmsub231sx(_jit, 1, r1, r2, r3)
878 _vfmsub231sx(jit_state_t *_jit, jit_bool_t dbl,
879 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
882 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
884 mrm(0x03, r7(r1), r7(r3));
888 _sse_fmar_f(jit_state_t *_jit,
889 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
893 if (r0 != r2 && r0 != r3) {
895 vfmadd213ss(r0, r2, r3);
898 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
899 sse_movr_f(rn(t0), r1);
900 vfmadd213ss(rn(t0), r2, r3);
901 sse_movr_f(r0, rn(t0));
907 sse_mulr_f(r0, r1, r2);
908 sse_addr_f(r0, r0, r3);
911 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
912 sse_mulr_f(rn(t0), r1, r2);
913 sse_addr_f(r0, rn(t0), r3);
920 _sse_fmar_d(jit_state_t *_jit,
921 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
925 if (r0 != r2 && r0 != r3) {
927 vfmadd213sd(r0, r2, r3);
930 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
931 sse_movr_d(rn(t0), r1);
932 vfmadd213sd(rn(t0), r2, r3);
933 sse_movr_d(r0, rn(t0));
939 sse_mulr_d(r0, r1, r2);
940 sse_addr_d(r0, r0, r3);
943 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
944 sse_mulr_d(rn(t0), r1, r2);
945 sse_addr_d(r0, rn(t0), r3);
952 _sse_fmsr_f(jit_state_t *_jit,
953 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
957 if (r0 != r2 && r0 != r3) {
959 vfmsub213ss(r0, r2, r3);
962 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
963 sse_movr_f(rn(t0), r1);
964 vfmsub213ss(rn(t0), r2, r3);
965 sse_movr_f(r0, rn(t0));
971 sse_mulr_f(r0, r1, r2);
972 sse_subr_f(r0, r0, r3);
975 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
976 sse_mulr_f(rn(t0), r1, r2);
977 sse_subr_f(r0, rn(t0), r3);
984 _sse_fmsr_d(jit_state_t *_jit,
985 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
989 if (r0 != r2 && r0 != r3) {
991 vfmsub213sd(r0, r2, r3);
994 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
995 sse_movr_d(rn(t0), r1);
996 vfmsub213sd(rn(t0), r2, r3);
997 sse_movr_d(r0, rn(t0));
1003 sse_mulr_d(r0, r1, r2);
1004 sse_subr_d(r0, r0, r3);
1007 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1008 sse_mulr_d(rn(t0), r1, r2);
1009 sse_subr_d(r0, rn(t0), r3);
1016 _sse_fnmar_f(jit_state_t *_jit,
1017 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1021 if (r0 != r2 && r0 != r3) {
1023 vfmsub213ss(r0, r2, r3);
1026 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1027 sse_negr_f(rn(t0), r1);
1028 vfmsub213ss(rn(t0), r2, r3);
1029 sse_movr_f(r0, rn(t0));
1034 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1035 sse_negr_f(rn(t0), r1);
1036 sse_mulr_f(rn(t0), rn(t0), r2);
1037 sse_subr_f(r0, rn(t0), r3);
1043 _sse_fnmar_d(jit_state_t *_jit,
1044 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1048 if (r0 != r2 && r0 != r3) {
1050 vfmsub213sd(r0, r2, r3);
1053 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1054 sse_negr_d(rn(t0), r1);
1055 vfmsub213sd(rn(t0), r2, r3);
1056 sse_movr_d(r0, rn(t0));
1061 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1062 sse_negr_d(rn(t0), r1);
1063 sse_mulr_d(rn(t0), rn(t0), r2);
1064 sse_subr_d(r0, rn(t0), r3);
1070 _sse_fnmsr_f(jit_state_t *_jit,
1071 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1075 if (r0 != r2 && r0 != r3) {
1077 vfmadd213ss(r0, r2, r3);
1080 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1081 sse_negr_f(rn(t0), r1);
1082 vfmadd213ss(rn(t0), r2, r3);
1083 sse_movr_f(r0, rn(t0));
1088 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1089 sse_negr_f(rn(t0), r1);
1090 sse_mulr_f(rn(t0), rn(t0), r2);
1091 sse_addr_f(r0, rn(t0), r3);
1097 _sse_fnmsr_d(jit_state_t *_jit,
1098 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1102 if (r0 != r2 && r0 != r3) {
1104 vfmadd213sd(r0, r2, r3);
1107 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1108 sse_negr_d(rn(t0), r1);
1109 vfmadd213sd(rn(t0), r2, r3);
1110 sse_movr_d(r0, rn(t0));
1115 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1116 sse_negr_d(rn(t0), r1);
1117 sse_mulr_d(rn(t0), rn(t0), r2);
1118 sse_addr_d(r0, rn(t0), r3);
1124 _ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
1125 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1129 if ((rc = reg8_p(r0)))
1146 _sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1149 ssexr(0xf3, X86_SSE_MOV, r0, r1);
1153 _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
1163 if (data.f == 0.0 && !(data.i & 0x80000000))
1166 ldi = !_jitc->no_data;
1168 /* if will allocate a register for offset, just use immediate */
1169 # if CAN_RIP_ADDRESS
1171 jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1172 ldi = can_sign_extend_int_p(rel);
1173 if (!ldi && address_p(i0))
1177 if (ldi && !address_p(i0))
1182 sse_ldi_f(r0, (jit_word_t)i0);
1184 reg = jit_get_reg(jit_class_gpr);
1185 movi(rn(reg), data.i);
1186 movdxr(r0, rn(reg));
1193 _sse_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1196 reg = jit_get_reg(jit_class_gpr);
1198 movdxr(r0, rn(reg));
1206 _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1211 if ((rc = reg8_p(r0)))
1215 movr(r0, _RAX_REGNO);
1221 patch_at(jp_code, _jit->pc.w);
1231 _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1236 if ((rc = reg8_p(r0)))
1240 movr(r0, _RAX_REGNO);
1246 patch_at(jp_code, _jit->pc.w);
1255 _sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1260 ssecmpf(X86_CC_NA, r0, r2, r1);
1266 _sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1271 ssecmpf(X86_CC_E, r0, r1, r2);
1277 _sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1282 ssecmpf(X86_CC_NA, r0, r1, r2);
1289 _sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1294 ssecmpf(X86_CC_NE, r0, r1, r2);
1302 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1306 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1307 if (can_sign_extend_int_p(rel))
1308 movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
1312 movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
1314 reg = jit_get_reg(jit_class_gpr);
1316 sse_ldr_f(r0, rn(reg));
1322 _sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1326 reg = jit_get_reg(jit_class_gpr);
1327 addr(rn(reg), r1, r2);
1328 sse_ldr_f(r0, rn(reg));
1331 movssmr(0, r1, r2, _SCL1, r0);
1336 _sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1339 if (can_sign_extend_int_p(i0))
1340 movssmr(i0, r1, _NOREG, _SCL1, r0);
1342 reg = jit_get_reg(jit_class_gpr);
1344 addi(rn(reg), r1, i0);
1345 sse_ldr_f(r0, rn(reg));
1348 sse_ldxr_f(r0, r1, rn(reg));
1355 _sse_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1357 assert(i0 == 4 || i0 == 8);
1365 _sse_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
1367 assert(i1 == 4 || i1 == 8);
1375 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1379 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1380 if (can_sign_extend_int_p(rel))
1381 movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
1385 movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
1387 reg = jit_get_reg(jit_class_gpr);
1389 sse_str_f(rn(reg), r0);
1395 _sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1399 reg = jit_get_reg(jit_class_gpr);
1400 addr(rn(reg), r0, r1);
1401 sse_str_f(rn(reg), r2);
1404 movssrm(r2, 0, r0, r1, _SCL1);
1409 _sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1412 if (can_sign_extend_int_p(i0))
1413 movssrm(r1, i0, r0, _NOREG, _SCL1);
1415 reg = jit_get_reg(jit_class_gpr);
1417 addi(rn(reg), r0, i0);
1418 sse_str_f(rn(reg), r1);
1421 sse_stxr_f(rn(reg), r0, r1);
1428 _sse_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1430 assert(i0 == 4 || i0 == 8);
1438 _sse_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
1440 assert(i1 == 4 || i1 == 8);
1448 _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1456 _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1464 _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1471 patch_at(jp_code, _jit->pc.w);
1477 _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1485 _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1493 _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1501 patch_at(jp_code, _jit->pc.w);
1503 patch_at(jz_code, _jit->pc.w);
1509 _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1517 _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1531 _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1545 _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1559 _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1567 _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1575 _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1583 _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1594 _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1599 if ((rc = reg8_p(r0)))
1603 movr(r0, _RAX_REGNO);
1609 patch_at(jp_code, _jit->pc.w);
1619 _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1624 if ((rc = reg8_p(r0)))
1628 movr(r0, _RAX_REGNO);
1634 patch_at(jp_code, _jit->pc.w);
1643 _sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1648 ssecmpd(X86_CC_NA, r0, r2, r1);
1654 _sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1659 ssecmpd(X86_CC_E, r0, r1, r2);
1665 _sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1670 ssecmpd(X86_CC_NA, r0, r1, r2);
1677 _sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1682 ssecmpd(X86_CC_NE, r0, r1, r2);
1690 _sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1693 ssexr(0xf2, X86_SSE_MOV, r0, r1);
1697 _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1708 if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1711 ldi = !_jitc->no_data;
1713 /* if will allocate a register for offset, just use immediate */
1714 # if CAN_RIP_ADDRESS
1716 jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1717 ldi = can_sign_extend_int_p(rel);
1718 if (!ldi && address_p(i0))
1722 if (ldi && !address_p(i0))
1727 sse_ldi_d(r0, (jit_word_t)i0);
1729 reg = jit_get_reg(jit_class_gpr);
1730 #if __X64 && !__X64_32
1731 movi(rn(reg), data.w);
1732 movqxr(r0, rn(reg));
1736 movi(rn(reg), data.ii[0]);
1737 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1738 movi(rn(reg), data.ii[1]);
1739 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1741 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1747 #if __X32 || __X64_32
1749 _sse_movr_ww_d(jit_state_t *_jit,
1750 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1753 stxi_i(CVT_OFFSET, _RBP_REGNO, r1);
1754 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, r2);
1755 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1759 _sse_movr_d_ww(jit_state_t *_jit,
1760 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1763 sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r2);
1764 ldxi_i(r0, _RBP_REGNO, CVT_OFFSET);
1765 ldxi_i(r1, _RBP_REGNO, CVT_OFFSET + 4);
1769 _sse_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
1773 reg = jit_get_reg(jit_class_gpr);
1775 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1777 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1778 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1783 _sse_movi_w_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1786 reg = jit_get_reg(jit_class_gpr);
1788 movqxr(r0, rn(reg));
1794 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1798 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1799 if (can_sign_extend_int_p(rel))
1800 movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
1804 movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
1806 reg = jit_get_reg(jit_class_gpr);
1808 sse_ldr_d(r0, rn(reg));
1814 _sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1818 reg = jit_get_reg(jit_class_gpr);
1819 addr(rn(reg), r1, r2);
1820 sse_ldr_d(r0, rn(reg));
1823 movsdmr(0, r1, r2, _SCL1, r0);
1828 _sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1831 if (can_sign_extend_int_p(i0))
1832 movsdmr(i0, r1, _NOREG, _SCL1, r0);
1834 reg = jit_get_reg(jit_class_gpr);
1836 addi(rn(reg), r1, i0);
1837 sse_ldr_d(r0, rn(reg));
1840 sse_ldxr_d(r0, r1, rn(reg));
1847 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1851 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1852 if (can_sign_extend_int_p(rel))
1853 movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
1857 movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
1859 reg = jit_get_reg(jit_class_gpr);
1861 sse_str_d(rn(reg), r0);
1867 _sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1871 reg = jit_get_reg(jit_class_gpr);
1872 addr(rn(reg), r0, r1);
1873 sse_str_d(rn(reg), r2);
1876 movsdrm(r2, 0, r0, r1, _SCL1);
1881 _sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1884 if (can_sign_extend_int_p(i0))
1885 movsdrm(r1, i0, r0, _NOREG, _SCL1);
1887 reg = jit_get_reg(jit_class_gpr);
1889 addi(rn(reg), r0, i0);
1890 sse_str_d(rn(reg), r1);
1893 sse_stxr_f(rn(reg), r0, r1);
1900 _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1908 _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1916 _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1923 patch_at(jp_code, _jit->pc.w);
1929 _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1937 _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1945 _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1953 patch_at(jp_code, _jit->pc.w);
1955 patch_at(jz_code, _jit->pc.w);
1961 _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1969 _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1983 _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1997 _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2011 _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2019 _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2027 _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2035 _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)