2 * Copyright (C) 2012-2022 Free Software Foundation, Inc.
4 * This file is part of GNU lightning.
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
17 * Paulo Cesar Pereira de Andrade
22 # define sse_address_p(i0) 1
25 # define sse_address_p(i0) ((jit_word_t)(i0) >= 0)
27 # define sse_address_p(i0) can_sign_extend_int_p(i0)
30 # define _XMM6_REGNO 6
31 # define _XMM7_REGNO 7
32 # define _XMM8_REGNO 8
33 # define _XMM9_REGNO 9
34 # define _XMM10_REGNO 10
35 # define _XMM11_REGNO 11
36 # define _XMM12_REGNO 12
37 # define _XMM13_REGNO 13
38 # define _XMM14_REGNO 14
39 # define _XMM15_REGNO 15
40 #define X86_SSE_MOV 0x10
41 #define X86_SSE_MOV1 0x11
42 #define X86_SSE_MOVLP 0x12
43 #define X86_SSE_MOVHP 0x16
44 #define X86_SSE_MOVA 0x28
45 #define X86_SSE_CVTIS 0x2a
46 #define X86_SSE_CVTTSI 0x2c
47 #define X86_SSE_CVTSI 0x2d
48 #define X86_SSE_UCOMI 0x2e
49 #define X86_SSE_COMI 0x2f
50 #define X86_SSE_ROUND 0x3a
51 #define X86_SSE_SQRT 0x51
52 #define X86_SSE_RSQRT 0x52
53 #define X86_SSE_RCP 0x53
54 #define X86_SSE_AND 0x54
55 #define X86_SSE_ANDN 0x55
56 #define X86_SSE_OR 0x56
57 #define X86_SSE_XOR 0x57
58 #define X86_SSE_ADD 0x58
59 #define X86_SSE_MUL 0x59
60 #define X86_SSE_CVTSD 0x5a
61 #define X86_SSE_CVTDT 0x5b
62 #define X86_SSE_SUB 0x5c
63 #define X86_SSE_MIN 0x5d
64 #define X86_SSE_DIV 0x5e
65 #define X86_SSE_MAX 0x5f
66 #define X86_SSE_X2G 0x6e
67 #define X86_SSE_EQB 0x74
68 #define X86_SSE_EQW 0x75
69 #define X86_SSE_EQD 0x76
70 #define X86_SSE_G2X 0x7e
71 #define X86_SSE_MOV2 0xd6
72 # define sser(c,r0,r1) _sser(_jit,c,r0,r1)
73 static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
74 # define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
75 static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
76 # define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
77 static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
78 # define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
79 # define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1)
80 # define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1)
81 # define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1)
82 # define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1)
83 # define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1)
84 # define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1)
85 # define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1)
86 # define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1)
87 # define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1)
88 # define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
89 # define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
91 # define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
92 # define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
93 # define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
94 # define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
96 # define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
97 # define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
99 # define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
100 # define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
101 # define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)
102 # define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
103 # define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
104 # define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
105 # define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
106 # define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
107 # define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
108 # define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
109 # define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
110 # define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
111 # define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1)
112 # if __X64 && !__X64_32
113 # define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
115 _sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
117 # define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1)
119 # define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
120 # define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
121 # define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
122 # define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
123 # define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
125 _ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
126 jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
127 # define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2)
128 static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
129 # define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0)
130 static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
131 # define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2)
132 static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
133 # define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0)
134 static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
135 # define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2)
136 static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
137 # define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0)
138 static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
139 # define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2)
140 static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
141 # define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0)
142 static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
143 # define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1)
144 # define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0)
145 static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
146 # define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1)
147 # define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0)
148 static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
149 # define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2)
150 static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
151 # define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0)
152 static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
153 # define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2)
154 static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
155 # define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0)
156 static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
157 # define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2)
158 static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
159 # define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0)
160 static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
161 # define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2)
162 static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
163 # define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0)
164 static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
165 # define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1)
166 static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
167 # define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1)
168 static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
169 # define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1)
170 static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
171 # define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1)
172 static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
173 # define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
174 # define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
175 # define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
176 # define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
178 _ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
179 jit_int32_t, jit_int32_t, jit_int32_t);
180 #define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1)
181 static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
182 #define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
183 static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
184 # define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
185 static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
186 # define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
187 # define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0)
188 static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
189 # define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2)
190 # define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0)
191 static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
192 # define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2)
193 static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
194 # define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0)
195 static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
196 # define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1)
197 # define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0)
198 static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
199 # define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1)
200 # define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0)
201 static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
202 # define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2)
203 static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
204 # define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0)
205 static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
206 # define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1)
207 # define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0)
208 static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
209 # define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2)
210 # define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0)
211 static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
212 static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
213 # define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2)
214 static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
215 # define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0)
216 static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
217 # define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2)
218 static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
219 # define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0)
220 static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
221 # define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2)
222 # define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0)
223 static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
224 # define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2)
225 static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
226 # define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0)
227 static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
228 # define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1)
229 # define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0)
230 static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
231 # define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1)
232 # define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0)
233 # define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0)
234 static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
235 # define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2)
236 static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
237 # define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
238 static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
239 # define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
240 # define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
241 static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
242 # define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2)
243 static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
244 # define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
245 static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
246 # define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
247 static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
248 # define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
250 _sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
251 # define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1)
252 static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
253 # define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1)
255 _sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
256 # define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1)
257 static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
258 # define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1)
260 _sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
261 # define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1)
262 static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
263 # define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1)
265 _sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
266 # define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1)
267 static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
268 # define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1)
270 _sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
271 # define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1)
272 static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
273 # define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1)
275 _sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
276 # define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1)
277 static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
278 # define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1)
280 _sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
281 # define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1)
282 static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
283 # define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1)
285 _sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
286 # define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1)
287 static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
288 # define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1)
290 _sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
291 # define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1)
292 static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
293 # define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1)
295 _sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
296 # define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1)
297 static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
298 # define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1)
300 _sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
301 # define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1)
302 static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
303 # define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1)
305 _sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
306 # define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1)
307 static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
308 # define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1)
310 _sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
311 # define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1)
312 static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
313 # define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1)
315 _sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
316 #define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1)
317 static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
318 #define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
319 static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
320 # define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
321 # define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
322 static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
323 # define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2)
324 # define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0)
325 static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
326 # define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2)
327 static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
328 # define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0)
329 static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
330 # define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1)
331 # define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0)
332 static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
333 # define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1)
334 # define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0)
335 static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
336 # define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2)
337 static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
338 # define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0)
339 static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
340 # define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1)
341 # define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0)
342 static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
343 # define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2)
344 static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
345 # define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0)
346 static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
347 # define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2)
348 static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
349 # define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0)
350 static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
351 # define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2)
352 static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
353 # define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0)
354 static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
355 # define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2)
356 # define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0)
357 static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
358 # define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2)
359 static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
360 # define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0)
361 static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
362 # define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1)
363 # define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0)
364 static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
365 # define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1)
366 # define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0)
367 static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
368 # define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0)
369 # define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0)
370 static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
371 # define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2)
372 static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
373 # define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0)
374 static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
375 # define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1)
376 # define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1)
377 # define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0)
378 static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
379 # define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2)
380 static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
381 # define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1)
382 static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
383 static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
384 # define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1)
386 _sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
387 # define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1)
388 static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
389 # define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1)
391 _sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
392 # define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1)
393 static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
394 # define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1)
396 _sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
397 # define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1)
398 static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
399 # define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1)
401 _sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
402 # define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1)
403 static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
404 # define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1)
406 _sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
407 # define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1)
408 static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
409 # define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1)
411 _sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
412 # define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1)
413 static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
414 # define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1)
416 _sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
417 # define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1)
418 static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
419 # define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1)
421 _sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
422 # define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1)
423 static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
424 # define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1)
426 _sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
427 # define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1)
428 static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
429 # define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1)
431 _sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
432 # define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1)
433 static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
434 # define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1)
436 _sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
437 # define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1)
438 static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
439 # define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1)
441 _sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
442 # define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1)
443 static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
444 # define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1)
446 _sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
447 # define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1)
448 static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
449 # define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1)
451 _sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
455 # define fpr_opi(name, type, size) \
457 _sse_##name##i_##type(jit_state_t *_jit, \
458 jit_int32_t r0, jit_int32_t r1, \
459 jit_float##size##_t *i0) \
461 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
462 assert(jit_sse_reg_p(reg)); \
463 sse_movi_##type(rn(reg), i0); \
464 sse_##name##r_##type(r0, r1, rn(reg)); \
465 jit_unget_reg(reg); \
467 # define fpr_bopi(name, type, size) \
469 _sse_b##name##i_##type(jit_state_t *_jit, \
470 jit_word_t i0, jit_int32_t r0, \
471 jit_float##size##_t *i1) \
474 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
475 jit_class_nospill); \
476 assert(jit_sse_reg_p(reg)); \
477 sse_movi_##type(rn(reg), i1); \
478 word = sse_b##name##r_##type(i0, r0, rn(reg)); \
479 jit_unget_reg(reg); \
482 # define fopi(name) fpr_opi(name, f, 32)
483 # define fbopi(name) fpr_bopi(name, f, 32)
484 # define dopi(name) fpr_opi(name, d, 64)
485 # define dbopi(name) fpr_bopi(name, d, 64)
487 _sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
489 rex(0, 0, r0, 0, r1);
492 mrm(0x03, r7(r0), r7(r1));
496 _ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
497 jit_int32_t r0, jit_int32_t r1)
500 rex(0, 0, r0, 0, r1);
503 mrm(0x03, r7(r0), r7(r1));
507 _ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
508 jit_int32_t m, jit_int32_t i)
514 mrm(0x03, r7(m), r7(r0));
520 _sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
521 jit_int32_t r0, jit_int32_t r1)
524 rex(0, 1, r0, 0, r1);
527 mrm(0x03, r7(r0), r7(r1));
532 _ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
533 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
536 rex(0, 0, rd, ri, rb);
539 rx(rd, md, rb, ri, ms);
543 _sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
558 _sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
573 _sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
579 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
580 sse_movr_f(rn(reg), r0);
594 _sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
600 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
601 sse_movr_d(rn(reg), r0);
619 _sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
634 _sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
649 _sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
655 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
656 sse_movr_f(rn(reg), r0);
670 _sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
676 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
677 sse_movr_d(rn(reg), r0);
691 _sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
695 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
696 pcmpeqlr(rn(reg), rn(reg));
709 _sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
713 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
714 pcmpeqlr(rn(reg), rn(reg));
727 _sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
729 jit_int32_t freg, ireg;
730 ireg = jit_get_reg(jit_class_gpr);
731 imovi(rn(ireg), 0x80000000);
733 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
734 movdlxr(rn(freg), rn(ireg));
735 xorpsr(r0, rn(freg));
739 movdlxr(r0, rn(ireg));
746 _sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
748 jit_int32_t freg, ireg;
749 ireg = jit_get_reg(jit_class_gpr);
750 imovi(rn(ireg), 0x80000000);
752 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
753 movdlxr(rn(freg), rn(ireg));
755 xorpdr(r0, rn(freg));
759 movdlxr(r0, rn(ireg));
767 _ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
768 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
772 if ((rc = reg8_p(r0)))
789 _sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
792 ssexr(0xf3, X86_SSE_MOV, r0, r1);
796 _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
806 if (data.f == 0.0 && !(data.i & 0x80000000))
809 ldi = !_jitc->no_data;
811 /* if will allocate a register for offset, just use immediate */
812 if (ldi && !sse_address_p(i0))
816 sse_ldi_f(r0, (jit_word_t)i0);
818 reg = jit_get_reg(jit_class_gpr);
819 movi(rn(reg), data.i);
820 movdlxr(r0, rn(reg));
830 _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
835 if ((rc = reg8_p(r0)))
839 movr(r0, _RAX_REGNO);
844 jp_code = _jit->pc.w;
846 patch_rel_char(jp_code, _jit->pc.w);
856 _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
861 if ((rc = reg8_p(r0)))
865 movr(r0, _RAX_REGNO);
870 jp_code = _jit->pc.w;
872 patch_rel_char(jp_code, _jit->pc.w);
881 _sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
886 ssecmpf(X86_CC_NA, r0, r2, r1);
892 _sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
897 ssecmpf(X86_CC_E, r0, r1, r2);
903 _sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
908 ssecmpf(X86_CC_NA, r0, r1, r2);
915 _sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
920 ssecmpf(X86_CC_NE, r0, r1, r2);
928 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
931 if (sse_address_p(i0))
932 movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
934 reg = jit_get_reg(jit_class_gpr);
936 sse_ldr_f(r0, rn(reg));
942 _sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
946 reg = jit_get_reg(jit_class_gpr);
947 addr(rn(reg), r1, r2);
948 sse_ldr_f(r0, rn(reg));
951 movssmr(0, r1, r2, _SCL1, r0);
956 _sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
959 if (can_sign_extend_int_p(i0))
960 movssmr(i0, r1, _NOREG, _SCL1, r0);
962 reg = jit_get_reg(jit_class_gpr);
964 addi(rn(reg), r1, i0);
965 sse_ldr_f(r0, rn(reg));
968 sse_ldxr_f(r0, r1, rn(reg));
975 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
978 if (sse_address_p(i0))
979 movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
981 reg = jit_get_reg(jit_class_gpr);
983 sse_str_f(rn(reg), r0);
989 _sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
993 reg = jit_get_reg(jit_class_gpr);
994 addr(rn(reg), r0, r1);
995 sse_str_f(rn(reg), r2);
998 movssrm(r2, 0, r0, r1, _SCL1);
1003 _sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1006 if (can_sign_extend_int_p(i0))
1007 movssrm(r1, i0, r0, _NOREG, _SCL1);
1009 reg = jit_get_reg(jit_class_gpr);
1011 addi(rn(reg), r0, i0);
1012 sse_str_f(rn(reg), r1);
1015 sse_stxr_f(rn(reg), r0, r1);
1022 _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1026 return (_jit->pc.w);
1031 _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1035 return (_jit->pc.w);
1040 _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1045 jp_code = _jit->pc.w;
1047 patch_rel_char(jp_code, _jit->pc.w);
1048 return (_jit->pc.w);
1053 _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1057 return (_jit->pc.w);
1062 _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1066 return (_jit->pc.w);
1071 _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1077 jp_code = _jit->pc.w;
1079 jz_code = _jit->pc.w;
1080 patch_rel_char(jp_code, _jit->pc.w);
1082 patch_rel_char(jz_code, _jit->pc.w);
1083 return (_jit->pc.w);
1088 _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1092 return (_jit->pc.w);
1097 _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1105 return (_jit->pc.w);
1110 _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1118 return (_jit->pc.w);
1123 _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1131 return (_jit->pc.w);
1136 _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1140 return (_jit->pc.w);
1145 _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1149 return (_jit->pc.w);
1154 _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1158 return (_jit->pc.w);
1163 _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1167 return (_jit->pc.w);
1175 _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1180 if ((rc = reg8_p(r0)))
1184 movr(r0, _RAX_REGNO);
1189 jp_code = _jit->pc.w;
1191 patch_rel_char(jp_code, _jit->pc.w);
1201 _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1206 if ((rc = reg8_p(r0)))
1210 movr(r0, _RAX_REGNO);
1215 jp_code = _jit->pc.w;
1217 patch_rel_char(jp_code, _jit->pc.w);
1226 _sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1231 ssecmpd(X86_CC_NA, r0, r2, r1);
1237 _sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1242 ssecmpd(X86_CC_E, r0, r1, r2);
1248 _sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1253 ssecmpd(X86_CC_NA, r0, r1, r2);
1260 _sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1265 ssecmpd(X86_CC_NE, r0, r1, r2);
1273 _sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1276 ssexr(0xf2, X86_SSE_MOV, r0, r1);
1280 _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1291 if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1294 ldi = !_jitc->no_data;
1296 /* if will allocate a register for offset, just use immediate */
1297 if (ldi && !sse_address_p(i0))
1301 sse_ldi_d(r0, (jit_word_t)i0);
1303 reg = jit_get_reg(jit_class_gpr);
1304 #if __X64 && !__X64_32
1305 movi(rn(reg), data.w);
1306 movdqxr(r0, rn(reg));
1309 movi(rn(reg), data.ii[0]);
1310 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1311 movi(rn(reg), data.ii[1]);
1312 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1314 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1321 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1324 if (sse_address_p(i0))
1325 movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
1327 reg = jit_get_reg(jit_class_gpr);
1329 sse_ldr_d(r0, rn(reg));
1335 _sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1339 reg = jit_get_reg(jit_class_gpr);
1340 addr(rn(reg), r1, r2);
1341 sse_ldr_d(r0, rn(reg));
1344 movsdmr(0, r1, r2, _SCL1, r0);
1349 _sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1352 if (can_sign_extend_int_p(i0))
1353 movsdmr(i0, r1, _NOREG, _SCL1, r0);
1355 reg = jit_get_reg(jit_class_gpr);
1357 addi(rn(reg), r1, i0);
1358 sse_ldr_d(r0, rn(reg));
1361 sse_ldxr_d(r0, r1, rn(reg));
1368 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1371 if (sse_address_p(i0))
1372 movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
1374 reg = jit_get_reg(jit_class_gpr);
1376 sse_str_d(rn(reg), r0);
1382 _sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1386 reg = jit_get_reg(jit_class_gpr);
1387 addr(rn(reg), r0, r1);
1388 sse_str_d(rn(reg), r2);
1391 movsdrm(r2, 0, r0, r1, _SCL1);
1396 _sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1399 if (can_sign_extend_int_p(i0))
1400 movsdrm(r1, i0, r0, _NOREG, _SCL1);
1402 reg = jit_get_reg(jit_class_gpr);
1404 addi(rn(reg), r0, i0);
1405 sse_str_d(rn(reg), r1);
1408 sse_stxr_f(rn(reg), r0, r1);
1415 _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1419 return (_jit->pc.w);
1424 _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1428 return (_jit->pc.w);
1433 _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1438 jp_code = _jit->pc.w;
1440 patch_rel_char(jp_code, _jit->pc.w);
1441 return (_jit->pc.w);
1446 _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1450 return (_jit->pc.w);
1455 _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1459 return (_jit->pc.w);
1464 _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1470 jp_code = _jit->pc.w;
1472 jz_code = _jit->pc.w;
1473 patch_rel_char(jp_code, _jit->pc.w);
1475 patch_rel_char(jz_code, _jit->pc.w);
1476 return (_jit->pc.w);
1481 _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1485 return (_jit->pc.w);
1490 _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1498 return (_jit->pc.w);
1503 _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1511 return (_jit->pc.w);
1516 _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1524 return (_jit->pc.w);
1529 _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1533 return (_jit->pc.w);
1538 _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1542 return (_jit->pc.w);
1547 _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1551 return (_jit->pc.w);
1556 _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1560 return (_jit->pc.w);