2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4 * This file is part of GNU lightning.
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
17 * Paulo Cesar Pereira de Andrade
21 # define _XMM6_REGNO 6
22 # define _XMM7_REGNO 7
23 # define _XMM8_REGNO 8
24 # define _XMM9_REGNO 9
25 # define _XMM10_REGNO 10
26 # define _XMM11_REGNO 11
27 # define _XMM12_REGNO 12
28 # define _XMM13_REGNO 13
29 # define _XMM14_REGNO 14
30 # define _XMM15_REGNO 15
31 #define X86_SSE_MOV 0x10
32 #define X86_SSE_MOV1 0x11
33 #define X86_SSE_MOVLP 0x12
34 #define X86_SSE_MOVHP 0x16
35 #define X86_SSE_MOVA 0x28
36 #define X86_SSE_CVTIS 0x2a
37 #define X86_SSE_CVTTSI 0x2c
38 #define X86_SSE_CVTSI 0x2d
39 #define X86_SSE_UCOMI 0x2e
40 #define X86_SSE_COMI 0x2f
41 #define X86_SSE_ROUND 0x3a
42 #define X86_SSE_SQRT 0x51
43 #define X86_SSE_RSQRT 0x52
44 #define X86_SSE_RCP 0x53
45 #define X86_SSE_AND 0x54
46 #define X86_SSE_ANDN 0x55
47 #define X86_SSE_OR 0x56
48 #define X86_SSE_XOR 0x57
49 #define X86_SSE_ADD 0x58
50 #define X86_SSE_MUL 0x59
51 #define X86_SSE_CVTSD 0x5a
52 #define X86_SSE_CVTDT 0x5b
53 #define X86_SSE_SUB 0x5c
54 #define X86_SSE_MIN 0x5d
55 #define X86_SSE_DIV 0x5e
56 #define X86_SSE_MAX 0x5f
57 #define X86_SSE_X2G 0x6e
58 #define X86_SSE_EQB 0x74
59 #define X86_SSE_EQW 0x75
60 #define X86_SSE_EQD 0x76
61 #define X86_SSE_G2X 0x7e
62 #define X86_SSE_MOV2 0xd6
63 # define sser(c,r0,r1) _sser(_jit,c,r0,r1)
64 static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
65 # define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
66 static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
67 # define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
68 static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
69 # define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
70 # define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1)
71 # define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1)
72 # define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1)
73 # define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1)
74 # define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1)
75 # define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1)
76 # define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1)
77 # define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1)
78 # define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1)
79 # define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
80 # define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
82 # define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
83 # define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
84 # define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
85 # define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
87 # define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
88 # define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
90 # define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
91 # define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
92 # define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)
93 # define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
94 # define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
95 # define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
96 # define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
97 # define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
98 # define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
99 # define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
100 # define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
101 # define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
102 # define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1)
103 # if __X64 && !__X64_32
104 # define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
106 _sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
108 # define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1)
110 # define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
111 # define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
112 # define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
113 # define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
114 # define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
116 _ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
117 jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
118 # define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2)
119 static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
120 # define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0)
121 static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
122 # define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2)
123 static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
124 # define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0)
125 static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
126 # define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2)
127 static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
128 # define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0)
129 static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
130 # define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2)
131 static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
132 # define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0)
133 static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
134 # define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1)
135 # define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0)
136 static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
137 # define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1)
138 # define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0)
139 static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
140 # define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2)
141 static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
142 # define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0)
143 static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
144 # define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2)
145 static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
146 # define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0)
147 static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
148 # define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2)
149 static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
150 # define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0)
151 static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
152 # define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2)
153 static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
154 # define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0)
155 static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
156 # define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1)
157 static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
158 # define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1)
159 static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
160 # define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1)
161 static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
162 # define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1)
163 static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
164 # define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
165 # define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
166 # define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
167 # define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
169 _ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
170 jit_int32_t, jit_int32_t, jit_int32_t);
171 #define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1)
172 static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
173 #define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
174 static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
175 # define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
176 static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
177 # define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
178 # define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0)
179 static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
180 # define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2)
181 # define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0)
182 static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
183 # define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2)
184 static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
185 # define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0)
186 static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
187 # define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1)
188 # define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0)
189 static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
190 # define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1)
191 # define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0)
192 static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
193 # define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2)
194 static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
195 # define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0)
196 static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
197 # define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1)
198 # define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0)
199 static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
200 # define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2)
201 # define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0)
202 static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
203 static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
204 # define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2)
205 static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
206 # define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0)
207 static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
208 # define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2)
209 static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
210 # define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0)
211 static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
212 # define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2)
213 # define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0)
214 static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
215 # define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2)
216 static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
217 # define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0)
218 static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
219 # define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1)
220 # define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0)
221 static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
222 # define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1)
223 # define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0)
224 # define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0)
225 static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
226 # define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2)
227 static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
228 # define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
229 static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
230 # define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
231 # define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
232 static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
233 # define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2)
234 static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
235 # define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
236 static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
237 # define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
238 static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
239 # define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
241 _sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
242 # define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1)
243 static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
244 # define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1)
246 _sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
247 # define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1)
248 static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
249 # define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1)
251 _sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
252 # define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1)
253 static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
254 # define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1)
256 _sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
257 # define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1)
258 static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
259 # define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1)
261 _sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
262 # define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1)
263 static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
264 # define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1)
266 _sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
267 # define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1)
268 static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
269 # define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1)
271 _sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
272 # define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1)
273 static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
274 # define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1)
276 _sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
277 # define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1)
278 static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
279 # define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1)
281 _sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
282 # define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1)
283 static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
284 # define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1)
286 _sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
287 # define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1)
288 static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
289 # define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1)
291 _sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
292 # define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1)
293 static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
294 # define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1)
296 _sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
297 # define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1)
298 static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
299 # define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1)
301 _sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
302 # define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1)
303 static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
304 # define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1)
306 _sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
307 #define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1)
308 static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
309 #define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
310 static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
311 # define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
312 # define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
313 static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
314 # define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2)
315 # define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0)
316 static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
317 # define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2)
318 static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
319 # define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0)
320 static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
321 # define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1)
322 # define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0)
323 static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
324 # define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1)
325 # define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0)
326 static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
327 # define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2)
328 static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
329 # define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0)
330 static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
331 # define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1)
332 # define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0)
333 static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
334 # define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2)
335 static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
336 # define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0)
337 static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
338 # define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2)
339 static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
340 # define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0)
341 static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
342 # define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2)
343 static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
344 # define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0)
345 static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
346 # define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2)
347 # define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0)
348 static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
349 # define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2)
350 static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
351 # define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0)
352 static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
353 # define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1)
354 # define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0)
355 static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
356 # define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1)
357 # define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0)
358 static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
359 # define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0)
360 # define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0)
361 static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
362 # define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2)
363 static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
364 # define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0)
365 static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
366 # define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1)
367 # define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1)
368 # define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0)
369 static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
370 # define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2)
371 static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
372 # define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1)
373 static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
374 static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
375 # define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1)
377 _sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
378 # define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1)
379 static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
380 # define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1)
382 _sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
383 # define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1)
384 static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
385 # define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1)
387 _sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
388 # define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1)
389 static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
390 # define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1)
392 _sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
393 # define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1)
394 static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
395 # define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1)
397 _sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
398 # define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1)
399 static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
400 # define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1)
402 _sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
403 # define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1)
404 static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
405 # define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1)
407 _sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
408 # define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1)
409 static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
410 # define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1)
412 _sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
413 # define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1)
414 static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
415 # define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1)
417 _sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
418 # define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1)
419 static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
420 # define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1)
422 _sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
423 # define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1)
424 static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
425 # define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1)
427 _sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
428 # define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1)
429 static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
430 # define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1)
432 _sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
433 # define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1)
434 static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
435 # define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1)
437 _sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
438 # define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1)
439 static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
440 # define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1)
442 _sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
446 # define fpr_opi(name, type, size) \
448 _sse_##name##i_##type(jit_state_t *_jit, \
449 jit_int32_t r0, jit_int32_t r1, \
450 jit_float##size##_t *i0) \
452 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
453 assert(jit_sse_reg_p(reg)); \
454 sse_movi_##type(rn(reg), i0); \
455 sse_##name##r_##type(r0, r1, rn(reg)); \
456 jit_unget_reg(reg); \
458 # define fpr_bopi(name, type, size) \
460 _sse_b##name##i_##type(jit_state_t *_jit, \
461 jit_word_t i0, jit_int32_t r0, \
462 jit_float##size##_t *i1) \
465 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
466 jit_class_nospill); \
467 assert(jit_sse_reg_p(reg)); \
468 sse_movi_##type(rn(reg), i1); \
469 w = sse_b##name##r_##type(i0, r0, rn(reg)); \
470 jit_unget_reg(reg); \
473 # define fopi(name) fpr_opi(name, f, 32)
474 # define fbopi(name) fpr_bopi(name, f, 32)
475 # define dopi(name) fpr_opi(name, d, 64)
476 # define dbopi(name) fpr_bopi(name, d, 64)
478 _sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
480 rex(0, 0, r0, 0, r1);
483 mrm(0x03, r7(r0), r7(r1));
487 _ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
488 jit_int32_t r0, jit_int32_t r1)
491 rex(0, 0, r0, 0, r1);
494 mrm(0x03, r7(r0), r7(r1));
498 _ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
499 jit_int32_t m, jit_int32_t i)
505 mrm(0x03, r7(m), r7(r0));
511 _sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
512 jit_int32_t r0, jit_int32_t r1)
515 rex(0, 1, r0, 0, r1);
518 mrm(0x03, r7(r0), r7(r1));
523 _ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
524 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
527 rex(0, 0, rd, ri, rb);
530 rx(rd, md, rb, ri, ms);
534 _sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
549 _sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
564 _sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
570 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
571 sse_movr_f(rn(reg), r0);
585 _sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
591 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
592 sse_movr_d(rn(reg), r0);
610 _sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
625 _sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
640 _sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
646 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
647 sse_movr_f(rn(reg), r0);
661 _sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
667 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
668 sse_movr_d(rn(reg), r0);
682 _sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
686 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
687 pcmpeqlr(rn(reg), rn(reg));
700 _sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
704 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
705 pcmpeqlr(rn(reg), rn(reg));
718 _sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
720 jit_int32_t freg, ireg;
721 ireg = jit_get_reg(jit_class_gpr);
722 imovi(rn(ireg), 0x80000000);
724 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
725 movdlxr(rn(freg), rn(ireg));
726 xorpsr(r0, rn(freg));
730 movdlxr(r0, rn(ireg));
737 _sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
739 jit_int32_t freg, ireg;
740 ireg = jit_get_reg(jit_class_gpr);
741 imovi(rn(ireg), 0x80000000);
743 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
744 movdlxr(rn(freg), rn(ireg));
746 xorpdr(r0, rn(freg));
750 movdlxr(r0, rn(ireg));
758 _ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
759 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
763 if ((rc = reg8_p(r0)))
780 _sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
783 ssexr(0xf3, X86_SSE_MOV, r0, r1);
787 _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
797 if (data.f == 0.0 && !(data.i & 0x80000000))
800 ldi = !_jitc->no_data;
802 /* if will allocate a register for offset, just use immediate */
805 jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
806 ldi = can_sign_extend_int_p(rel);
807 if (!ldi && address_p(i0))
811 if (ldi && !address_p(i0))
816 sse_ldi_f(r0, (jit_word_t)i0);
818 reg = jit_get_reg(jit_class_gpr);
819 movi(rn(reg), data.i);
820 movdlxr(r0, rn(reg));
830 _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
835 if ((rc = reg8_p(r0)))
839 movr(r0, _RAX_REGNO);
845 patch_at(jp_code, _jit->pc.w);
855 _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
860 if ((rc = reg8_p(r0)))
864 movr(r0, _RAX_REGNO);
870 patch_at(jp_code, _jit->pc.w);
879 _sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
884 ssecmpf(X86_CC_NA, r0, r2, r1);
890 _sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
895 ssecmpf(X86_CC_E, r0, r1, r2);
901 _sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
906 ssecmpf(X86_CC_NA, r0, r1, r2);
913 _sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
918 ssecmpf(X86_CC_NE, r0, r1, r2);
926 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
930 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
931 if (can_sign_extend_int_p(rel))
932 movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
936 movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
938 reg = jit_get_reg(jit_class_gpr);
940 sse_ldr_f(r0, rn(reg));
946 _sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
950 reg = jit_get_reg(jit_class_gpr);
951 addr(rn(reg), r1, r2);
952 sse_ldr_f(r0, rn(reg));
955 movssmr(0, r1, r2, _SCL1, r0);
960 _sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
963 if (can_sign_extend_int_p(i0))
964 movssmr(i0, r1, _NOREG, _SCL1, r0);
966 reg = jit_get_reg(jit_class_gpr);
968 addi(rn(reg), r1, i0);
969 sse_ldr_f(r0, rn(reg));
972 sse_ldxr_f(r0, r1, rn(reg));
979 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
983 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
984 if (can_sign_extend_int_p(rel))
985 movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
989 movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
991 reg = jit_get_reg(jit_class_gpr);
993 sse_str_f(rn(reg), r0);
999 _sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1003 reg = jit_get_reg(jit_class_gpr);
1004 addr(rn(reg), r0, r1);
1005 sse_str_f(rn(reg), r2);
1008 movssrm(r2, 0, r0, r1, _SCL1);
1013 _sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1016 if (can_sign_extend_int_p(i0))
1017 movssrm(r1, i0, r0, _NOREG, _SCL1);
1019 reg = jit_get_reg(jit_class_gpr);
1021 addi(rn(reg), r0, i0);
1022 sse_str_f(rn(reg), r1);
1025 sse_stxr_f(rn(reg), r0, r1);
1032 _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1040 _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1048 _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1055 patch_at(jp_code, _jit->pc.w);
1061 _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1069 _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1077 _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1085 patch_at(jp_code, _jit->pc.w);
1087 patch_at(jz_code, _jit->pc.w);
1093 _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1101 _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1115 _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1129 _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1143 _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1151 _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1159 _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1167 _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1178 _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1183 if ((rc = reg8_p(r0)))
1187 movr(r0, _RAX_REGNO);
1193 patch_at(jp_code, _jit->pc.w);
1203 _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1208 if ((rc = reg8_p(r0)))
1212 movr(r0, _RAX_REGNO);
1218 patch_at(jp_code, _jit->pc.w);
1227 _sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1232 ssecmpd(X86_CC_NA, r0, r2, r1);
1238 _sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1243 ssecmpd(X86_CC_E, r0, r1, r2);
1249 _sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1254 ssecmpd(X86_CC_NA, r0, r1, r2);
1261 _sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1266 ssecmpd(X86_CC_NE, r0, r1, r2);
1274 _sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1277 ssexr(0xf2, X86_SSE_MOV, r0, r1);
1281 _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1292 if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1295 ldi = !_jitc->no_data;
1297 /* if will allocate a register for offset, just use immediate */
1298 # if CAN_RIP_ADDRESS
1300 jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1301 ldi = can_sign_extend_int_p(rel);
1302 if (!ldi && address_p(i0))
1306 if (ldi && !address_p(i0))
1311 sse_ldi_d(r0, (jit_word_t)i0);
1313 reg = jit_get_reg(jit_class_gpr);
1314 #if __X64 && !__X64_32
1315 movi(rn(reg), data.w);
1316 movdqxr(r0, rn(reg));
1320 movi(rn(reg), data.ii[0]);
1321 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1322 movi(rn(reg), data.ii[1]);
1323 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1325 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1332 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1336 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1337 if (can_sign_extend_int_p(rel))
1338 movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
1342 movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
1344 reg = jit_get_reg(jit_class_gpr);
1346 sse_ldr_d(r0, rn(reg));
1352 _sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1356 reg = jit_get_reg(jit_class_gpr);
1357 addr(rn(reg), r1, r2);
1358 sse_ldr_d(r0, rn(reg));
1361 movsdmr(0, r1, r2, _SCL1, r0);
1366 _sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1369 if (can_sign_extend_int_p(i0))
1370 movsdmr(i0, r1, _NOREG, _SCL1, r0);
1372 reg = jit_get_reg(jit_class_gpr);
1374 addi(rn(reg), r1, i0);
1375 sse_ldr_d(r0, rn(reg));
1378 sse_ldxr_d(r0, r1, rn(reg));
1385 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1389 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1390 if (can_sign_extend_int_p(rel))
1391 movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
1395 movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
1397 reg = jit_get_reg(jit_class_gpr);
1399 sse_str_d(rn(reg), r0);
1405 _sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1409 reg = jit_get_reg(jit_class_gpr);
1410 addr(rn(reg), r0, r1);
1411 sse_str_d(rn(reg), r2);
1414 movsdrm(r2, 0, r0, r1, _SCL1);
1419 _sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1422 if (can_sign_extend_int_p(i0))
1423 movsdrm(r1, i0, r0, _NOREG, _SCL1);
1425 reg = jit_get_reg(jit_class_gpr);
1427 addi(rn(reg), r0, i0);
1428 sse_str_d(rn(reg), r1);
1431 sse_stxr_f(rn(reg), r0, r1);
1438 _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1446 _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1454 _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1461 patch_at(jp_code, _jit->pc.w);
1467 _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1475 _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1483 _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1491 patch_at(jp_code, _jit->pc.w);
1493 patch_at(jz_code, _jit->pc.w);
1499 _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1507 _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1521 _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1535 _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1549 _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1557 _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1565 _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1573 _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)