Merge pull request #750 from pcercuei/lightrec-update-20230826
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86-sse.c
CommitLineData
4a71579b 1/*
79bfeef6 2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4a71579b
PC
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20#if PROTO
4a71579b
PC
21# define _XMM6_REGNO 6
22# define _XMM7_REGNO 7
23# define _XMM8_REGNO 8
24# define _XMM9_REGNO 9
25# define _XMM10_REGNO 10
26# define _XMM11_REGNO 11
27# define _XMM12_REGNO 12
28# define _XMM13_REGNO 13
29# define _XMM14_REGNO 14
30# define _XMM15_REGNO 15
31#define X86_SSE_MOV 0x10
32#define X86_SSE_MOV1 0x11
33#define X86_SSE_MOVLP 0x12
34#define X86_SSE_MOVHP 0x16
35#define X86_SSE_MOVA 0x28
36#define X86_SSE_CVTIS 0x2a
37#define X86_SSE_CVTTSI 0x2c
38#define X86_SSE_CVTSI 0x2d
39#define X86_SSE_UCOMI 0x2e
40#define X86_SSE_COMI 0x2f
41#define X86_SSE_ROUND 0x3a
42#define X86_SSE_SQRT 0x51
43#define X86_SSE_RSQRT 0x52
44#define X86_SSE_RCP 0x53
45#define X86_SSE_AND 0x54
46#define X86_SSE_ANDN 0x55
47#define X86_SSE_OR 0x56
48#define X86_SSE_XOR 0x57
49#define X86_SSE_ADD 0x58
50#define X86_SSE_MUL 0x59
51#define X86_SSE_CVTSD 0x5a
52#define X86_SSE_CVTDT 0x5b
53#define X86_SSE_SUB 0x5c
54#define X86_SSE_MIN 0x5d
55#define X86_SSE_DIV 0x5e
56#define X86_SSE_MAX 0x5f
57#define X86_SSE_X2G 0x6e
58#define X86_SSE_EQB 0x74
59#define X86_SSE_EQW 0x75
60#define X86_SSE_EQD 0x76
61#define X86_SSE_G2X 0x7e
62#define X86_SSE_MOV2 0xd6
63# define sser(c,r0,r1) _sser(_jit,c,r0,r1)
64static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
65# define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
ba86ff93
PC
66static void _ssexr(jit_state_t*,
67 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
68# define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
69static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
70# define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
71# define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1)
72# define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1)
73# define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1)
74# define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1)
75# define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1)
76# define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1)
77# define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1)
78# define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1)
79# define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1)
80# define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
81# define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
82# if __X64
83# define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
84# define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
85# define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
86# define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
87# else
88# define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
89# define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
90# endif
91# define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
92# define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
93# define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)
94# define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
95# define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
96# define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
ba86ff93
PC
97# define movdxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
98# define movdrx(r0,r1) ssexr(0x66, X86_SSE_G2X,r0,r1)
99# define movqxr(r0,r1) sselxr(0x66, X86_SSE_X2G,r0,r1)
100# define movqrx(r0,r1) sselxr(0x66, X86_SSE_G2X,r0,r1)
4a71579b
PC
101# define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
102# define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
103# define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
104# define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
105# define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
4a71579b
PC
106# if __X64 && !__X64_32
107# define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
108static void
109_sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
110# else
111# define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1)
112# endif
113# define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
114# define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
115# define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
116# define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
117# define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
118static void
119_ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
120 jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
121# define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2)
122static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
123# define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0)
124static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
125# define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2)
126static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
127# define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0)
128static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
129# define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2)
130static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
131# define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0)
132static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
133# define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2)
134static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
135# define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0)
136static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
137# define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1)
138# define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0)
139static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
140# define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1)
141# define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0)
142static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
143# define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2)
144static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
145# define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0)
146static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
147# define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2)
148static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
149# define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0)
150static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
151# define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2)
152static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
153# define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0)
154static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
155# define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2)
156static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
157# define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0)
158static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
159# define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1)
160static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
161# define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1)
162static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
163# define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1)
164static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
165# define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1)
166static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
167# define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
168# define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
ba86ff93
PC
169# define sse_fmar_f(r0, r1, r2, r3) _sse_fmar_f(_jit, r0, r1, r2, r3)
170static void _sse_fmar_f(jit_state_t*,
171 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
172# define sse_fmar_d(r0, r1, r2, r3) _sse_fmar_d(_jit, r0, r1, r2, r3)
173static void _sse_fmar_d(jit_state_t*,
174 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
175# define sse_fmsr_f(r0, r1, r2, r3) _sse_fmsr_f(_jit, r0, r1, r2, r3)
176static void _sse_fmsr_f(jit_state_t*,
177 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
178# define sse_fmsr_d(r0, r1, r2, r3) _sse_fmsr_d(_jit, r0, r1, r2, r3)
179static void _sse_fmsr_d(jit_state_t*,
180 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
181# define sse_fnmar_f(r0, r1, r2, r3) _sse_fnmar_f(_jit, r0, r1, r2, r3)
182static void _sse_fnmar_f(jit_state_t*,
183 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
184# define sse_fnmar_d(r0, r1, r2, r3) _sse_fnmar_d(_jit, r0, r1, r2, r3)
185static void _sse_fnmar_d(jit_state_t*,
186 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
187# define sse_fnmsr_f(r0, r1, r2, r3) _sse_fnmsr_f(_jit, r0, r1, r2, r3)
188static void _sse_fnmsr_f(jit_state_t*,
189 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
190# define sse_fnmsr_d(r0, r1, r2, r3) _sse_fnmsr_d(_jit, r0, r1, r2, r3)
191static void _sse_fnmsr_d(jit_state_t*,
192 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
4a71579b
PC
193# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
194# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
195static void
196_ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
197 jit_int32_t, jit_int32_t, jit_int32_t);
198#define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1)
199static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
200#define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
201static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
ba86ff93
PC
202# define sse_movr_w_f(r0,r1) movdxr(r0, r1)
203# define sse_movr_f_w(r0,r1) movdrx(r1, r0)
204#define sse_movi_w_f(r0, i0) _sse_movi_w_f(_jit, r0, i0)
205static void _sse_movi_w_f(jit_state_t*, jit_int32_t, jit_word_t);
4a71579b
PC
206# define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
207static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
208# define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
209# define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0)
210static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
211# define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2)
212# define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0)
213static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
214# define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2)
215static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
216# define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0)
217static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
218# define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1)
219# define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0)
220static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
221# define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1)
222# define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0)
223static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
224# define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2)
225static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
226# define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0)
227static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
228# define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1)
229# define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0)
230static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
231# define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2)
232# define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0)
233static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
234static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
235# define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2)
236static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
237# define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0)
238static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
239# define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2)
240static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
241# define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0)
242static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
243# define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2)
244# define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0)
245static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
246# define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2)
247static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
248# define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0)
249static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
250# define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1)
251# define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0)
252static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
253# define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1)
254# define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0)
255# define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0)
256static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
257# define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2)
258static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
259# define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
260static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
ba86ff93
PC
261# define sse_unldr_x(r0, r1, i0) _sse_unldr_x(_jit, r0, r1, i0)
262static void _sse_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
263# define sse_unldi_x(r0, i0, i1) _sse_unldi_x(_jit, r0, i0, i1)
264static void _sse_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
4a71579b
PC
265# define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
266# define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
267static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
268# define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2)
269static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
270# define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
271static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
ba86ff93
PC
272#define sse_unstr_x(r0, r1, i0) _sse_unstr_x(_jit, r0, r1, i0)
273static void _sse_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
274#define sse_unsti_x(i0, r0, i1) _sse_unsti_x(_jit, i0, r0, i1)
275static void _sse_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
4a71579b
PC
276# define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
277static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
278# define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
279static jit_word_t
280_sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
281# define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1)
282static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
283# define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1)
284static jit_word_t
285_sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
286# define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1)
287static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
288# define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1)
289static jit_word_t
290_sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
291# define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1)
292static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
293# define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1)
294static jit_word_t
295_sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
296# define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1)
297static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
298# define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1)
299static jit_word_t
300_sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
301# define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1)
302static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
303# define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1)
304static jit_word_t
305_sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
306# define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1)
307static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
308# define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1)
309static jit_word_t
310_sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
311# define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1)
312static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
313# define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1)
314static jit_word_t
315_sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
316# define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1)
317static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
318# define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1)
319static jit_word_t
320_sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
321# define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1)
322static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
323# define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1)
324static jit_word_t
325_sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
326# define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1)
327static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
328# define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1)
329static jit_word_t
330_sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
331# define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1)
332static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
333# define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1)
334static jit_word_t
335_sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
336# define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1)
337static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
338# define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1)
339static jit_word_t
340_sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
341# define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1)
342static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
343# define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1)
344static jit_word_t
345_sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
346#define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1)
347static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
348#define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
349static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
ba86ff93
PC
350# if __X32 || __X64_32
351# define sse_movr_ww_d(r0, r1, r2) _sse_movr_ww_d(_jit, r0, r1, r2)
352static void _sse_movr_ww_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
353# define sse_movr_d_ww(r0, r1, r2) _sse_movr_d_ww(_jit, r0, r1, r2)
354static void _sse_movr_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
355# define sse_movi_ww_d(r0, i0, i1) _sse_movi_ww_d(_jit, r0, i0, i1)
356static void _sse_movi_ww_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
357# else
358# define sse_movr_w_d(r0, r1) movqxr(r0, r1)
359# define sse_movr_d_w(r0, r1) movqrx(r1, r0)
360# define sse_movi_w_d(r0, i0) _sse_movi_w_d(_jit, r0, i0)
361static void _sse_movi_w_d(jit_state_t*, jit_int32_t, jit_word_t);
362# endif
4a71579b
PC
363# define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
364# define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
365static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
366# define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2)
367# define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0)
368static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
369# define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2)
370static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
371# define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0)
372static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
373# define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1)
374# define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0)
375static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
376# define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1)
377# define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0)
378static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
379# define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2)
380static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
381# define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0)
382static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
383# define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1)
384# define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0)
385static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
386# define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2)
387static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
388# define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0)
389static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
390# define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2)
391static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
392# define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0)
393static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
394# define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2)
395static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
396# define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0)
397static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
398# define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2)
399# define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0)
400static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
401# define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2)
402static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
403# define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0)
404static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
405# define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1)
406# define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0)
407static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
408# define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1)
409# define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0)
410static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
411# define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0)
412# define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0)
413static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
414# define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2)
415static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
416# define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0)
417static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
418# define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1)
419# define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1)
420# define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0)
421static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
422# define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2)
423static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
424# define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1)
425static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
426static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
427# define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1)
428static jit_word_t
429_sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
430# define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1)
431static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
432# define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1)
433static jit_word_t
434_sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
435# define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1)
436static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
437# define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1)
438static jit_word_t
439_sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
440# define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1)
441static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
442# define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1)
443static jit_word_t
444_sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
445# define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1)
446static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
447# define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1)
448static jit_word_t
449_sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
450# define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1)
451static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
452# define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1)
453static jit_word_t
454_sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
455# define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1)
456static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
457# define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1)
458static jit_word_t
459_sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
460# define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1)
461static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
462# define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1)
463static jit_word_t
464_sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
465# define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1)
466static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
467# define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1)
468static jit_word_t
469_sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
470# define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1)
471static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
472# define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1)
473static jit_word_t
474_sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
475# define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1)
476static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
477# define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1)
478static jit_word_t
479_sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
480# define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1)
481static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
482# define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1)
483static jit_word_t
484_sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
485# define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1)
486static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
487# define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1)
488static jit_word_t
489_sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
490# define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1)
491static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
492# define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1)
493static jit_word_t
494_sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
495#endif
496
497#if CODE
498# define fpr_opi(name, type, size) \
499static void \
500_sse_##name##i_##type(jit_state_t *_jit, \
501 jit_int32_t r0, jit_int32_t r1, \
502 jit_float##size##_t *i0) \
503{ \
504 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
505 assert(jit_sse_reg_p(reg)); \
506 sse_movi_##type(rn(reg), i0); \
507 sse_##name##r_##type(r0, r1, rn(reg)); \
508 jit_unget_reg(reg); \
509}
510# define fpr_bopi(name, type, size) \
511static jit_word_t \
512_sse_b##name##i_##type(jit_state_t *_jit, \
513 jit_word_t i0, jit_int32_t r0, \
514 jit_float##size##_t *i1) \
515{ \
79bfeef6 516 jit_word_t w; \
4a71579b
PC
517 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
518 jit_class_nospill); \
519 assert(jit_sse_reg_p(reg)); \
520 sse_movi_##type(rn(reg), i1); \
79bfeef6 521 w = sse_b##name##r_##type(i0, r0, rn(reg)); \
4a71579b 522 jit_unget_reg(reg); \
79bfeef6 523 return (w); \
4a71579b
PC
524}
525# define fopi(name) fpr_opi(name, f, 32)
526# define fbopi(name) fpr_bopi(name, f, 32)
527# define dopi(name) fpr_opi(name, d, 64)
528# define dbopi(name) fpr_bopi(name, d, 64)
529static void
530_sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
531{
532 rex(0, 0, r0, 0, r1);
533 ic(0x0f);
534 ic(c);
535 mrm(0x03, r7(r0), r7(r1));
536}
537
538static void
539_ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
540 jit_int32_t r0, jit_int32_t r1)
541{
542 ic(p);
543 rex(0, 0, r0, 0, r1);
544 ic(0x0f);
545 ic(c);
546 mrm(0x03, r7(r0), r7(r1));
547}
548
549static void
550_ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
551 jit_int32_t m, jit_int32_t i)
552{
553 ic(0x66);
554 rex(0, 0, 0, 0, r0);
555 ic(0x0f);
556 ic(c);
557 mrm(0x03, r7(m), r7(r0));
558 ic(i);
559}
560
561#if __X64
562static void
563_sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
564 jit_int32_t r0, jit_int32_t r1)
565{
566 ic(p);
567 rex(0, 1, r0, 0, r1);
568 ic(0x0f);
569 ic(c);
570 mrm(0x03, r7(r0), r7(r1));
571}
572#endif
573
574static void
575_ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
576 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
577{
578 ic(px);
579 rex(0, 0, rd, ri, rb);
580 ic(0x0f);
581 ic(code);
582 rx(rd, md, rb, ri, ms);
583}
584
585static void
586_sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
587{
588 if (r0 == r1)
589 addssr(r0, r2);
590 else if (r0 == r2)
591 addssr(r0, r1);
592 else {
593 sse_movr_f(r0, r1);
594 addssr(r0, r2);
595 }
596}
597
598fopi(add)
599
600static void
601_sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
602{
603 if (r0 == r1)
604 addsdr(r0, r2);
605 else if (r0 == r2)
606 addsdr(r0, r1);
607 else {
608 sse_movr_d(r0, r1);
609 addsdr(r0, r2);
610 }
611}
612
613dopi(add)
614
615static void
616_sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
617{
618 jit_int32_t reg;
619 if (r0 == r1)
620 subssr(r0, r2);
621 else if (r0 == r2) {
622 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
623 sse_movr_f(rn(reg), r0);
624 sse_movr_f(r0, r1);
625 subssr(r0, rn(reg));
626 jit_unget_reg(reg);
627 }
628 else {
629 sse_movr_f(r0, r1);
630 subssr(r0, r2);
631 }
632}
633
634fopi(sub)
635
636static void
637_sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
638{
639 jit_int32_t reg;
640 if (r0 == r1)
641 subsdr(r0, r2);
642 else if (r0 == r2) {
643 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
644 sse_movr_d(rn(reg), r0);
645 sse_movr_d(r0, r1);
646 subsdr(r0, rn(reg));
647 jit_unget_reg(reg);
648 }
649 else {
650 sse_movr_d(r0, r1);
651 subsdr(r0, r2);
652 }
653}
654
655dopi(sub)
656
657fopi(rsb)
658
659dopi(rsb)
660
661static void
662_sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
663{
664 if (r0 == r1)
665 mulssr(r0, r2);
666 else if (r0 == r2)
667 mulssr(r0, r1);
668 else {
669 sse_movr_f(r0, r1);
670 mulssr(r0, r2);
671 }
672}
673
674fopi(mul)
675
676static void
677_sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
678{
679 if (r0 == r1)
680 mulsdr(r0, r2);
681 else if (r0 == r2)
682 mulsdr(r0, r1);
683 else {
684 sse_movr_d(r0, r1);
685 mulsdr(r0, r2);
686 }
687}
688
689dopi(mul)
690
691static void
692_sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
693{
694 jit_int32_t reg;
695 if (r0 == r1)
696 divssr(r0, r2);
697 else if (r0 == r2) {
698 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
699 sse_movr_f(rn(reg), r0);
700 sse_movr_f(r0, r1);
701 divssr(r0, rn(reg));
702 jit_unget_reg(reg);
703 }
704 else {
705 sse_movr_f(r0, r1);
706 divssr(r0, r2);
707 }
708}
709
710fopi(div)
711
712static void
713_sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
714{
715 jit_int32_t reg;
716 if (r0 == r1)
717 divsdr(r0, r2);
718 else if (r0 == r2) {
719 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
720 sse_movr_d(rn(reg), r0);
721 sse_movr_d(r0, r1);
722 divsdr(r0, rn(reg));
723 jit_unget_reg(reg);
724 }
725 else {
726 sse_movr_d(r0, r1);
727 divsdr(r0, r2);
728 }
729}
730
731dopi(div)
732
733static void
734_sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
735{
736 jit_int32_t reg;
737 if (r0 == r1) {
738 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
739 pcmpeqlr(rn(reg), rn(reg));
740 psrl(rn(reg), 1);
741 andpsr(r0, rn(reg));
742 jit_unget_reg(reg);
743 }
744 else {
745 pcmpeqlr(r0, r0);
746 psrl(r0, 1);
747 andpsr(r0, r1);
748 }
749}
750
751static void
752_sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
753{
754 jit_int32_t reg;
755 if (r0 == r1) {
756 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
757 pcmpeqlr(rn(reg), rn(reg));
758 psrq(rn(reg), 1);
759 andpdr(r0, rn(reg));
760 jit_unget_reg(reg);
761 }
762 else {
763 pcmpeqlr(r0, r0);
764 psrq(r0, 1);
765 andpdr(r0, r1);
766 }
767}
768
769static void
770_sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
771{
772 jit_int32_t freg, ireg;
773 ireg = jit_get_reg(jit_class_gpr);
774 imovi(rn(ireg), 0x80000000);
775 if (r0 == r1) {
776 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
ba86ff93 777 movdxr(rn(freg), rn(ireg));
4a71579b
PC
778 xorpsr(r0, rn(freg));
779 jit_unget_reg(freg);
780 }
781 else {
ba86ff93 782 movdxr(r0, rn(ireg));
4a71579b
PC
783 xorpsr(r0, r1);
784 }
785 jit_unget_reg(ireg);
786}
787
788static void
789_sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
790{
791 jit_int32_t freg, ireg;
792 ireg = jit_get_reg(jit_class_gpr);
793 imovi(rn(ireg), 0x80000000);
794 if (r0 == r1) {
795 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
ba86ff93 796 movdxr(rn(freg), rn(ireg));
4a71579b
PC
797 pslq(rn(freg), 32);
798 xorpdr(r0, rn(freg));
799 jit_unget_reg(freg);
800 }
801 else {
ba86ff93 802 movdxr(r0, rn(ireg));
4a71579b
PC
803 pslq(r0, 32);
804 xorpdr(r0, r1);
805 }
806 jit_unget_reg(ireg);
807}
808
ba86ff93
PC
809/* r1 = (r1 * r3) + r2 */
810#define vfmadd132ss(r1, r2, r3) _vfmadd132sx(_jit, 0, r1, r2, r3)
811#define vfmadd132sd(r1, r2, r3) _vfmadd132sx(_jit, 1, r1, r2, r3)
812static void
813_vfmadd132sx(jit_state_t *_jit, jit_bool_t dbl,
814 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
815{
816 /* VFMADD132SD */
817 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
818 ic(0x99);
819 mrm(0x03, r7(r1), r7(r3));
820}
821
822/* r1 = (r1 * r3) - r2 */
823#define vfmsub132ss(r1, r2, r3) _vfmsub132sx(_jit, 0, r1, r2, r3)
824#define vfmsub132sd(r1, r2, r3) _vfmsub132sx(_jit, 1, r1, r2, r3)
825static void
826_vfmsub132sx(jit_state_t *_jit, jit_bool_t dbl,
827 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
828{
829 /* VFMSUB132SD */
830 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
831 ic(0x9b);
832 mrm(0x03, r7(r1), r7(r3));
833}
834
835/* r1 = (r1 * r2) + r3 */
836#define vfmadd213ss(r1, r2, r3) _vfmadd213sx(_jit, 0, r1, r2, r3)
837#define vfmadd213sd(r1, r2, r3) _vfmadd213sx(_jit, 1, r1, r2, r3)
838static void
839_vfmadd213sx(jit_state_t *_jit, jit_bool_t dbl,
840 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
841{
842 /* VFMADD132SD */
843 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
844 ic(0xa9);
845 mrm(0x03, r7(r1), r7(r3));
846}
847
848/* r1 = (r1 * r2) - r3 */
849#define vfmsub213ss(r1, r2, r3) _vfmsub213sx(_jit, 0, r1, r2, r3)
850#define vfmsub213sd(r1, r2, r3) _vfmsub213sx(_jit, 1, r1, r2, r3)
851static void
852_vfmsub213sx(jit_state_t *_jit, jit_bool_t dbl,
853 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
854{
855 /* VFMSUB132SD */
856 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
857 ic(0xab);
858 mrm(0x03, r7(r1), r7(r3));
859}
860
861/* r1 = (r2 * r3) + r1 */
862#define vfmadd231ss(r1, r2, r3) _vfmadd231sx(_jit, 0, r1, r2, r3)
863#define vfmadd231sd(r1, r2, r3) _vfmadd231sx(_jit, 1, r1, r2, r3)
864static void
865_vfmadd231sx(jit_state_t *_jit, jit_bool_t dbl,
866 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
867{
868 /* VFMADD231SD */
869 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
870 ic(0xb9);
871 mrm(0x03, r7(r1), r7(r3));
872}
873
874/* r1 = (r2 * r3) - r1 */
875#define vfmsub231ss(r1, r2, r3) _vfmsub231sx(_jit, 0, r1, r2, r3)
876#define vfmsub231sd(r1, r2, r3) _vfmsub231sx(_jit, 1, r1, r2, r3)
877static void
878_vfmsub231sx(jit_state_t *_jit, jit_bool_t dbl,
879 jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
880{
881 /* VFMSUB231SD */
882 vex(r1, _NOREG, r3, 2, !!dbl, r2, 0, 1);
883 ic(0xbb);
884 mrm(0x03, r7(r1), r7(r3));
885}
886
887static void
888_sse_fmar_f(jit_state_t *_jit,
889 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
890{
891 jit_int32_t t0;
892 if (jit_cpu.fma) {
893 if (r0 != r2 && r0 != r3) {
894 sse_movr_f(r0, r1);
895 vfmadd213ss(r0, r2, r3);
896 }
897 else {
898 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
899 sse_movr_f(rn(t0), r1);
900 vfmadd213ss(rn(t0), r2, r3);
901 sse_movr_f(r0, rn(t0));
902 jit_unget_reg(t0);
903 }
904 }
905 else {
906 if (r0 != r3) {
907 sse_mulr_f(r0, r1, r2);
908 sse_addr_f(r0, r0, r3);
909 }
910 else {
911 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
912 sse_mulr_f(rn(t0), r1, r2);
913 sse_addr_f(r0, rn(t0), r3);
914 jit_unget_reg(t0);
915 }
916 }
917}
918
919static void
920_sse_fmar_d(jit_state_t *_jit,
921 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
922{
923 jit_int32_t t0;
924 if (jit_cpu.fma) {
925 if (r0 != r2 && r0 != r3) {
926 sse_movr_d(r0, r1);
927 vfmadd213sd(r0, r2, r3);
928 }
929 else {
930 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
931 sse_movr_d(rn(t0), r1);
932 vfmadd213sd(rn(t0), r2, r3);
933 sse_movr_d(r0, rn(t0));
934 jit_unget_reg(t0);
935 }
936 }
937 else {
938 if (r0 != r3) {
939 sse_mulr_d(r0, r1, r2);
940 sse_addr_d(r0, r0, r3);
941 }
942 else {
943 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
944 sse_mulr_d(rn(t0), r1, r2);
945 sse_addr_d(r0, rn(t0), r3);
946 jit_unget_reg(t0);
947 }
948 }
949}
950
951static void
952_sse_fmsr_f(jit_state_t *_jit,
953 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
954{
955 jit_int32_t t0;
956 if (jit_cpu.fma) {
957 if (r0 != r2 && r0 != r3) {
958 sse_movr_f(r0, r1);
959 vfmsub213ss(r0, r2, r3);
960 }
961 else {
962 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
963 sse_movr_f(rn(t0), r1);
964 vfmsub213ss(rn(t0), r2, r3);
965 sse_movr_f(r0, rn(t0));
966 jit_unget_reg(t0);
967 }
968 }
969 else {
970 if (r0 != r3) {
971 sse_mulr_f(r0, r1, r2);
972 sse_subr_f(r0, r0, r3);
973 }
974 else {
975 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
976 sse_mulr_f(rn(t0), r1, r2);
977 sse_subr_f(r0, rn(t0), r3);
978 jit_unget_reg(t0);
979 }
980 }
981}
982
983static void
984_sse_fmsr_d(jit_state_t *_jit,
985 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
986{
987 jit_int32_t t0;
988 if (jit_cpu.fma) {
989 if (r0 != r2 && r0 != r3) {
990 sse_movr_d(r0, r1);
991 vfmsub213sd(r0, r2, r3);
992 }
993 else {
994 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
995 sse_movr_d(rn(t0), r1);
996 vfmsub213sd(rn(t0), r2, r3);
997 sse_movr_d(r0, rn(t0));
998 jit_unget_reg(t0);
999 }
1000 }
1001 else {
1002 if (r0 != r3) {
1003 sse_mulr_d(r0, r1, r2);
1004 sse_subr_d(r0, r0, r3);
1005 }
1006 else {
1007 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1008 sse_mulr_d(rn(t0), r1, r2);
1009 sse_subr_d(r0, rn(t0), r3);
1010 jit_unget_reg(t0);
1011 }
1012 }
1013}
1014
1015static void
1016_sse_fnmar_f(jit_state_t *_jit,
1017 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1018{
1019 jit_int32_t t0;
1020 if (jit_cpu.fma) {
1021 if (r0 != r2 && r0 != r3) {
1022 sse_negr_f(r0, r1);
1023 vfmsub213ss(r0, r2, r3);
1024 }
1025 else {
1026 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1027 sse_negr_f(rn(t0), r1);
1028 vfmsub213ss(rn(t0), r2, r3);
1029 sse_movr_f(r0, rn(t0));
1030 jit_unget_reg(t0);
1031 }
1032 }
1033 else {
1034 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1035 sse_negr_f(rn(t0), r1);
1036 sse_mulr_f(rn(t0), rn(t0), r2);
1037 sse_subr_f(r0, rn(t0), r3);
1038 jit_unget_reg(t0);
1039 }
1040}
1041
1042static void
1043_sse_fnmar_d(jit_state_t *_jit,
1044 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1045{
1046 jit_int32_t t0;
1047 if (jit_cpu.fma) {
1048 if (r0 != r2 && r0 != r3) {
1049 sse_negr_d(r0, r1);
1050 vfmsub213sd(r0, r2, r3);
1051 }
1052 else {
1053 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1054 sse_negr_d(rn(t0), r1);
1055 vfmsub213sd(rn(t0), r2, r3);
1056 sse_movr_d(r0, rn(t0));
1057 jit_unget_reg(t0);
1058 }
1059 }
1060 else {
1061 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1062 sse_negr_d(rn(t0), r1);
1063 sse_mulr_d(rn(t0), rn(t0), r2);
1064 sse_subr_d(r0, rn(t0), r3);
1065 jit_unget_reg(t0);
1066 }
1067}
1068
1069static void
1070_sse_fnmsr_f(jit_state_t *_jit,
1071 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1072{
1073 jit_int32_t t0;
1074 if (jit_cpu.fma) {
1075 if (r0 != r2 && r0 != r3) {
1076 sse_negr_f(r0, r1);
1077 vfmadd213ss(r0, r2, r3);
1078 }
1079 else {
1080 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1081 sse_negr_f(rn(t0), r1);
1082 vfmadd213ss(rn(t0), r2, r3);
1083 sse_movr_f(r0, rn(t0));
1084 jit_unget_reg(t0);
1085 }
1086 }
1087 else {
1088 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1089 sse_negr_f(rn(t0), r1);
1090 sse_mulr_f(rn(t0), rn(t0), r2);
1091 sse_addr_f(r0, rn(t0), r3);
1092 jit_unget_reg(t0);
1093 }
1094}
1095
1096static void
1097_sse_fnmsr_d(jit_state_t *_jit,
1098 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3)
1099{
1100 jit_int32_t t0;
1101 if (jit_cpu.fma) {
1102 if (r0 != r2 && r0 != r3) {
1103 sse_negr_d(r0, r1);
1104 vfmadd213sd(r0, r2, r3);
1105 }
1106 else {
1107 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1108 sse_negr_d(rn(t0), r1);
1109 vfmadd213sd(rn(t0), r2, r3);
1110 sse_movr_d(r0, rn(t0));
1111 jit_unget_reg(t0);
1112 }
1113 }
1114 else {
1115 t0 = jit_get_reg(jit_class_fpr|jit_class_xpr);
1116 sse_negr_d(rn(t0), r1);
1117 sse_mulr_d(rn(t0), rn(t0), r2);
1118 sse_addr_d(r0, rn(t0), r3);
1119 jit_unget_reg(t0);
1120 }
1121}
1122
4a71579b
PC
1123static void
1124_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
1125 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1126{
1127 jit_bool_t rc;
1128 jit_int32_t reg;
1129 if ((rc = reg8_p(r0)))
1130 reg = r0;
1131 else {
1132 reg = _RAX_REGNO;
1133 movr(r0, reg);
1134 }
1135 ixorr(reg, reg);
1136 if (d)
1137 ucomisdr(r2, r1);
1138 else
1139 ucomissr(r2, r1);
1140 cc(code, reg);
1141 if (!rc)
1142 xchgr(r0, reg);
1143}
1144
1145static void
1146_sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1147{
1148 if (r0 != r1)
1149 ssexr(0xf3, X86_SSE_MOV, r0, r1);
1150}
1151
1152static void
1153_sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
1154{
1155 union {
1156 jit_int32_t i;
1157 jit_float32_t f;
1158 } data;
1159 jit_int32_t reg;
1160 jit_bool_t ldi;
1161
1162 data.f = *i0;
1163 if (data.f == 0.0 && !(data.i & 0x80000000))
1164 xorpsr(r0, r0);
1165 else {
1166 ldi = !_jitc->no_data;
1167#if __X64
1168 /* if will allocate a register for offset, just use immediate */
79bfeef6
PC
1169# if CAN_RIP_ADDRESS
1170 if (ldi) {
1171 jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1172 ldi = can_sign_extend_int_p(rel);
1173 if (!ldi && address_p(i0))
1174 ldi = 1;
1175 }
1176# else
1177 if (ldi && !address_p(i0))
4a71579b 1178 ldi = 0;
79bfeef6 1179# endif
4a71579b
PC
1180#endif
1181 if (ldi)
1182 sse_ldi_f(r0, (jit_word_t)i0);
1183 else {
1184 reg = jit_get_reg(jit_class_gpr);
1185 movi(rn(reg), data.i);
ba86ff93 1186 movdxr(r0, rn(reg));
4a71579b
PC
1187 jit_unget_reg(reg);
1188 }
1189 }
1190}
1191
ba86ff93
PC
1192static void
1193_sse_movi_w_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1194{
1195 jit_int32_t reg;
1196 reg = jit_get_reg(jit_class_gpr);
1197 movi(rn(reg), i0);
1198 movdxr(r0, rn(reg));
1199 jit_unget_reg(reg);
1200}
1201
4a71579b
PC
1202fopi(lt)
1203fopi(le)
1204
1205static void
1206_sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1207{
1208 jit_bool_t rc;
1209 jit_int32_t reg;
1210 jit_word_t jp_code;
1211 if ((rc = reg8_p(r0)))
1212 reg = r0;
1213 else {
1214 reg = _RAX_REGNO;
1215 movr(r0, _RAX_REGNO);
1216 }
1217 ixorr(reg, reg);
1218 ucomissr(r2, r1);
79bfeef6 1219 jp_code = jpes(0);
4a71579b 1220 cc(X86_CC_E, reg);
79bfeef6 1221 patch_at(jp_code, _jit->pc.w);
4a71579b
PC
1222 if (!rc)
1223 xchgr(r0, reg);
1224}
1225
1226fopi(eq)
1227fopi(ge)
1228fopi(gt)
1229
1230static void
1231_sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1232{
1233 jit_bool_t rc;
1234 jit_int32_t reg;
1235 jit_word_t jp_code;
1236 if ((rc = reg8_p(r0)))
1237 reg = r0;
1238 else {
1239 reg = _RAX_REGNO;
1240 movr(r0, _RAX_REGNO);
1241 }
1242 imovi(reg, 1);
1243 ucomissr(r2, r1);
79bfeef6 1244 jp_code = jpes(0);
4a71579b 1245 cc(X86_CC_NE, reg);
79bfeef6 1246 patch_at(jp_code, _jit->pc.w);
4a71579b
PC
1247 if (!rc)
1248 xchgr(r0, reg);
1249}
1250
1251fopi(ne)
1252fopi(unlt)
1253
1254static void
1255_sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1256{
1257 if (r1 == r2)
1258 movi(r0, 1);
1259 else
1260 ssecmpf(X86_CC_NA, r0, r2, r1);
1261}
1262
1263fopi(unle)
1264
1265static void
1266_sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1267{
1268 if (r1 == r2)
1269 movi(r0, 1);
1270 else
1271 ssecmpf(X86_CC_E, r0, r1, r2);
1272}
1273
1274fopi(uneq)
1275
1276static void
1277_sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1278{
1279 if (r1 == r2)
1280 movi(r0, 1);
1281 else
1282 ssecmpf(X86_CC_NA, r0, r1, r2);
1283}
1284
1285fopi(unge)
1286fopi(ungt)
1287
1288static void
1289_sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1290{
1291 if (r1 == r2)
1292 ixorr(r0, r0);
1293 else
1294 ssecmpf(X86_CC_NE, r0, r1, r2);
1295}
1296
1297fopi(ltgt)
1298fopi(ord)
1299fopi(unord)
1300
1301static void
1302_sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1303{
1304 jit_int32_t reg;
79bfeef6
PC
1305#if CAN_RIP_ADDRESS
1306 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1307 if (can_sign_extend_int_p(rel))
1308 movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
1309 else
1310#endif
1311 if (address_p(i0))
4a71579b
PC
1312 movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
1313 else {
1314 reg = jit_get_reg(jit_class_gpr);
1315 movi(rn(reg), i0);
1316 sse_ldr_f(r0, rn(reg));
1317 jit_unget_reg(reg);
1318 }
1319}
1320
1321static void
1322_sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1323{
1324#if __X64_32
1325 jit_int32_t reg;
1326 reg = jit_get_reg(jit_class_gpr);
1327 addr(rn(reg), r1, r2);
1328 sse_ldr_f(r0, rn(reg));
1329 jit_unget_reg(reg);
1330#else
1331 movssmr(0, r1, r2, _SCL1, r0);
1332#endif
1333}
1334
1335static void
1336_sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1337{
1338 jit_int32_t reg;
1339 if (can_sign_extend_int_p(i0))
1340 movssmr(i0, r1, _NOREG, _SCL1, r0);
1341 else {
1342 reg = jit_get_reg(jit_class_gpr);
1343#if __X64_32
1344 addi(rn(reg), r1, i0);
1345 sse_ldr_f(r0, rn(reg));
1346#else
1347 movi(rn(reg), i0);
1348 sse_ldxr_f(r0, r1, rn(reg));
1349#endif
1350 jit_unget_reg(reg);
1351 }
1352}
1353
ba86ff93
PC
1354static void
1355_sse_unldr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1356{
1357 assert(i0 == 4 || i0 == 8);
1358 if (i0 == 4)
1359 sse_ldr_f(r0, r1);
1360 else
1361 sse_ldr_d(r0, r1);
1362}
1363
1364static void
1365_sse_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
1366{
1367 assert(i1 == 4 || i1 == 8);
1368 if (i1 == 4)
1369 sse_ldi_f(r0, i0);
1370 else
1371 sse_ldi_d(r0, i0);
1372}
1373
4a71579b
PC
1374static void
1375_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1376{
1377 jit_int32_t reg;
79bfeef6
PC
1378#if CAN_RIP_ADDRESS
1379 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1380 if (can_sign_extend_int_p(rel))
1381 movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
1382 else
1383#endif
1384 if (address_p(i0))
4a71579b
PC
1385 movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
1386 else {
1387 reg = jit_get_reg(jit_class_gpr);
1388 movi(rn(reg), i0);
1389 sse_str_f(rn(reg), r0);
1390 jit_unget_reg(reg);
1391 }
1392}
1393
1394static void
1395_sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1396{
1397#if __X64_32
1398 jit_int32_t reg;
1399 reg = jit_get_reg(jit_class_gpr);
1400 addr(rn(reg), r0, r1);
1401 sse_str_f(rn(reg), r2);
1402 jit_unget_reg(reg);
1403#else
1404 movssrm(r2, 0, r0, r1, _SCL1);
1405#endif
1406}
1407
1408static void
1409_sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1410{
1411 jit_int32_t reg;
1412 if (can_sign_extend_int_p(i0))
1413 movssrm(r1, i0, r0, _NOREG, _SCL1);
1414 else {
1415 reg = jit_get_reg(jit_class_gpr);
1416#if __X64_32
1417 addi(rn(reg), r0, i0);
1418 sse_str_f(rn(reg), r1);
1419#else
1420 movi(rn(reg), i0);
1421 sse_stxr_f(rn(reg), r0, r1);
1422#endif
1423 jit_unget_reg(reg);
1424 }
1425}
1426
ba86ff93
PC
1427static void
1428_sse_unstr_x(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1429{
1430 assert(i0 == 4 || i0 == 8);
1431 if (i0 == 4)
1432 sse_str_f(r0, r1);
1433 else
1434 sse_str_d(r0, r1);
1435}
1436
1437static void
1438_sse_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
1439{
1440 assert(i1 == 4 || i1 == 8);
1441 if (i1 == 4)
1442 sse_sti_f(i0, r0);
1443 else
1444 sse_sti_d(i0, r0);
1445}
1446
4a71579b
PC
1447static jit_word_t
1448_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1449{
1450 ucomissr(r1, r0);
79bfeef6 1451 return (ja(i0));
4a71579b
PC
1452}
1453fbopi(lt)
1454
1455static jit_word_t
1456_sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1457{
1458 ucomissr(r1, r0);
79bfeef6 1459 return (jae(i0));
4a71579b
PC
1460}
1461fbopi(le)
1462
1463static jit_word_t
1464_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1465{
79bfeef6 1466 jit_word_t w;
4a71579b
PC
1467 jit_word_t jp_code;
1468 ucomissr(r0, r1);
79bfeef6
PC
1469 jp_code = jps(0);
1470 w = je(i0);
1471 patch_at(jp_code, _jit->pc.w);
1472 return (w);
4a71579b
PC
1473}
1474fbopi(eq)
1475
1476static jit_word_t
1477_sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1478{
1479 ucomissr(r0, r1);
79bfeef6 1480 return (jae(i0));
4a71579b
PC
1481}
1482fbopi(ge)
1483
1484static jit_word_t
1485_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1486{
1487 ucomissr(r0, r1);
79bfeef6 1488 return (ja(i0));
4a71579b
PC
1489}
1490fbopi(gt)
1491
1492static jit_word_t
1493_sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1494{
79bfeef6 1495 jit_word_t w;
4a71579b
PC
1496 jit_word_t jp_code;
1497 jit_word_t jz_code;
1498 ucomissr(r0, r1);
79bfeef6
PC
1499 jp_code = jps(0);
1500 jz_code = jzs(0);
1501 patch_at(jp_code, _jit->pc.w);
1502 w = jmpi(i0);
1503 patch_at(jz_code, _jit->pc.w);
1504 return (w);
4a71579b
PC
1505}
1506fbopi(ne)
1507
1508static jit_word_t
1509_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1510{
1511 ucomissr(r0, r1);
79bfeef6 1512 return (jnae(i0));
4a71579b
PC
1513}
1514fbopi(unlt)
1515
1516static jit_word_t
1517_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1518{
79bfeef6 1519 jit_word_t w;
4a71579b 1520 if (r0 == r1)
79bfeef6 1521 w = jmpi(i0);
4a71579b
PC
1522 else {
1523 ucomissr(r0, r1);
79bfeef6 1524 w = jna(i0);
4a71579b 1525 }
79bfeef6 1526 return (w);
4a71579b
PC
1527}
1528fbopi(unle)
1529
1530static jit_word_t
1531_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1532{
79bfeef6 1533 jit_word_t w;
4a71579b 1534 if (r0 == r1)
79bfeef6 1535 w = jmpi(i0);
4a71579b
PC
1536 else {
1537 ucomissr(r0, r1);
79bfeef6 1538 w = je(i0);
4a71579b 1539 }
79bfeef6 1540 return (w);
4a71579b
PC
1541}
1542fbopi(uneq)
1543
1544static jit_word_t
1545_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1546{
79bfeef6 1547 jit_word_t w;
4a71579b 1548 if (r0 == r1)
79bfeef6 1549 w = jmpi(i0);
4a71579b
PC
1550 else {
1551 ucomissr(r1, r0);
79bfeef6 1552 w = jna(i0);
4a71579b 1553 }
79bfeef6 1554 return (w);
4a71579b
PC
1555}
1556fbopi(unge)
1557
1558static jit_word_t
1559_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1560{
1561 ucomissr(r1, r0);
79bfeef6 1562 return (jnae(i0));
4a71579b
PC
1563}
1564fbopi(ungt)
1565
1566static jit_word_t
1567_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1568{
1569 ucomissr(r0, r1);
79bfeef6 1570 return (jne(i0));
4a71579b
PC
1571}
1572fbopi(ltgt)
1573
1574static jit_word_t
1575_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1576{
1577 ucomissr(r0, r1);
79bfeef6 1578 return (jnp(i0));
4a71579b
PC
1579}
1580fbopi(ord)
1581
1582static jit_word_t
1583_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1584{
1585 ucomissr(r0, r1);
79bfeef6 1586 return (jp(i0));
4a71579b
PC
1587}
1588fbopi(unord)
1589
1590dopi(lt)
1591dopi(le)
1592
1593static void
1594_sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1595{
1596 jit_bool_t rc;
1597 jit_int32_t reg;
1598 jit_word_t jp_code;
1599 if ((rc = reg8_p(r0)))
1600 reg = r0;
1601 else {
1602 reg = _RAX_REGNO;
1603 movr(r0, _RAX_REGNO);
1604 }
1605 ixorr(reg, reg);
1606 ucomisdr(r2, r1);
79bfeef6 1607 jp_code = jpes(0);
4a71579b 1608 cc(X86_CC_E, reg);
79bfeef6 1609 patch_at(jp_code, _jit->pc.w);
4a71579b
PC
1610 if (!rc)
1611 xchgr(r0, reg);
1612}
1613
1614dopi(eq)
1615dopi(ge)
1616dopi(gt)
1617
1618static void
1619_sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1620{
1621 jit_bool_t rc;
1622 jit_int32_t reg;
1623 jit_word_t jp_code;
1624 if ((rc = reg8_p(r0)))
1625 reg = r0;
1626 else {
1627 reg = _RAX_REGNO;
1628 movr(r0, _RAX_REGNO);
1629 }
1630 imovi(reg, 1);
1631 ucomisdr(r2, r1);
79bfeef6 1632 jp_code = jpes(0);
4a71579b 1633 cc(X86_CC_NE, reg);
79bfeef6 1634 patch_at(jp_code, _jit->pc.w);
4a71579b
PC
1635 if (!rc)
1636 xchgr(r0, reg);
1637}
1638
1639dopi(ne)
1640dopi(unlt)
1641
1642static void
1643_sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1644{
1645 if (r1 == r2)
1646 movi(r0, 1);
1647 else
1648 ssecmpd(X86_CC_NA, r0, r2, r1);
1649}
1650
1651dopi(unle)
1652
1653static void
1654_sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1655{
1656 if (r1 == r2)
1657 movi(r0, 1);
1658 else
1659 ssecmpd(X86_CC_E, r0, r1, r2);
1660}
1661
1662dopi(uneq)
1663
1664static void
1665_sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1666{
1667 if (r1 == r2)
1668 movi(r0, 1);
1669 else
1670 ssecmpd(X86_CC_NA, r0, r1, r2);
1671}
1672
1673dopi(unge)
1674dopi(ungt)
1675
1676static void
1677_sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1678{
1679 if (r1 == r2)
1680 ixorr(r0, r0);
1681 else
1682 ssecmpd(X86_CC_NE, r0, r1, r2);
1683}
1684
1685dopi(ltgt)
1686dopi(ord)
1687dopi(unord)
1688
1689static void
1690_sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1691{
1692 if (r0 != r1)
1693 ssexr(0xf2, X86_SSE_MOV, r0, r1);
1694}
1695
1696static void
1697_sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1698{
1699 union {
1700 jit_int32_t ii[2];
1701 jit_word_t w;
1702 jit_float64_t d;
1703 } data;
1704 jit_int32_t reg;
1705 jit_bool_t ldi;
1706
1707 data.d = *i0;
1708 if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1709 xorpdr(r0, r0);
1710 else {
1711 ldi = !_jitc->no_data;
1712#if __X64
1713 /* if will allocate a register for offset, just use immediate */
79bfeef6
PC
1714# if CAN_RIP_ADDRESS
1715 if (ldi) {
1716 jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1717 ldi = can_sign_extend_int_p(rel);
1718 if (!ldi && address_p(i0))
1719 ldi = 1;
1720 }
1721# else
1722 if (ldi && !address_p(i0))
4a71579b 1723 ldi = 0;
79bfeef6 1724# endif
4a71579b
PC
1725#endif
1726 if (ldi)
1727 sse_ldi_d(r0, (jit_word_t)i0);
1728 else {
1729 reg = jit_get_reg(jit_class_gpr);
1730#if __X64 && !__X64_32
1731 movi(rn(reg), data.w);
ba86ff93 1732 movqxr(r0, rn(reg));
4a71579b
PC
1733 jit_unget_reg(reg);
1734#else
79bfeef6 1735 CHECK_CVT_OFFSET();
4a71579b
PC
1736 movi(rn(reg), data.ii[0]);
1737 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1738 movi(rn(reg), data.ii[1]);
1739 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1740 jit_unget_reg(reg);
1741 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1742#endif
1743 }
1744 }
1745}
1746
ba86ff93
PC
1747#if __X32 || __X64_32
1748static void
1749_sse_movr_ww_d(jit_state_t *_jit,
1750 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1751{
1752 CHECK_CVT_OFFSET();
1753 stxi_i(CVT_OFFSET, _RBP_REGNO, r1);
1754 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, r2);
1755 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1756}
1757
1758static void
1759_sse_movr_d_ww(jit_state_t *_jit,
1760 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1761{
1762 CHECK_CVT_OFFSET();
1763 sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r2);
1764 ldxi_i(r0, _RBP_REGNO, CVT_OFFSET);
1765 ldxi_i(r1, _RBP_REGNO, CVT_OFFSET + 4);
1766}
1767
1768static void
1769_sse_movi_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
1770{
1771 jit_int32_t reg;
1772 CHECK_CVT_OFFSET();
1773 reg = jit_get_reg(jit_class_gpr);
1774 movi(rn(reg), i0);
1775 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1776 movi(rn(reg), i1);
1777 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1778 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1779 jit_unget_reg(reg);
1780}
1781#else
1782static void
1783_sse_movi_w_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1784{
1785 jit_int32_t reg;
1786 reg = jit_get_reg(jit_class_gpr);
1787 movi(rn(reg), i0);
1788 movqxr(r0, rn(reg));
1789 jit_unget_reg(reg);
1790}
1791#endif
1792
4a71579b
PC
1793static void
1794_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1795{
1796 jit_int32_t reg;
79bfeef6
PC
1797#if CAN_RIP_ADDRESS
1798 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1799 if (can_sign_extend_int_p(rel))
1800 movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
1801 else
1802#endif
1803 if (address_p(i0))
4a71579b
PC
1804 movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
1805 else {
1806 reg = jit_get_reg(jit_class_gpr);
1807 movi(rn(reg), i0);
1808 sse_ldr_d(r0, rn(reg));
1809 jit_unget_reg(reg);
1810 }
1811}
1812
1813static void
1814_sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1815{
1816#if __X64_32
1817 jit_int32_t reg;
1818 reg = jit_get_reg(jit_class_gpr);
1819 addr(rn(reg), r1, r2);
1820 sse_ldr_d(r0, rn(reg));
1821 jit_unget_reg(reg);
1822#else
1823 movsdmr(0, r1, r2, _SCL1, r0);
1824#endif
1825}
1826
1827static void
1828_sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1829{
1830 jit_int32_t reg;
1831 if (can_sign_extend_int_p(i0))
1832 movsdmr(i0, r1, _NOREG, _SCL1, r0);
1833 else {
1834 reg = jit_get_reg(jit_class_gpr);
1835#if __X64_32
1836 addi(rn(reg), r1, i0);
1837 sse_ldr_d(r0, rn(reg));
1838#else
1839 movi(rn(reg), i0);
1840 sse_ldxr_d(r0, r1, rn(reg));
1841#endif
1842 jit_unget_reg(reg);
1843 }
1844}
1845
1846static void
1847_sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1848{
1849 jit_int32_t reg;
79bfeef6
PC
1850#if CAN_RIP_ADDRESS
1851 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1852 if (can_sign_extend_int_p(rel))
1853 movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
1854 else
1855#endif
1856 if (address_p(i0))
4a71579b
PC
1857 movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
1858 else {
1859 reg = jit_get_reg(jit_class_gpr);
1860 movi(rn(reg), i0);
1861 sse_str_d(rn(reg), r0);
1862 jit_unget_reg(reg);
1863 }
1864}
1865
1866static void
1867_sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1868{
1869#if __X64_32
1870 jit_int32_t reg;
1871 reg = jit_get_reg(jit_class_gpr);
1872 addr(rn(reg), r0, r1);
1873 sse_str_d(rn(reg), r2);
1874 jit_unget_reg(reg);
1875#else
1876 movsdrm(r2, 0, r0, r1, _SCL1);
1877#endif
1878}
1879
1880static void
1881_sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1882{
1883 jit_int32_t reg;
1884 if (can_sign_extend_int_p(i0))
1885 movsdrm(r1, i0, r0, _NOREG, _SCL1);
1886 else {
1887 reg = jit_get_reg(jit_class_gpr);
1888#if __X64_32
1889 addi(rn(reg), r0, i0);
1890 sse_str_d(rn(reg), r1);
1891#else
1892 movi(rn(reg), i0);
1893 sse_stxr_f(rn(reg), r0, r1);
1894#endif
1895 jit_unget_reg(reg);
1896 }
1897}
1898
1899static jit_word_t
1900_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1901{
1902 ucomisdr(r1, r0);
79bfeef6 1903 return (ja(i0));
4a71579b
PC
1904}
1905dbopi(lt)
1906
1907static jit_word_t
1908_sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1909{
1910 ucomisdr(r1, r0);
79bfeef6 1911 return (jae(i0));
4a71579b
PC
1912}
1913dbopi(le)
1914
1915static jit_word_t
1916_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1917{
79bfeef6 1918 jit_word_t w;
4a71579b
PC
1919 jit_word_t jp_code;
1920 ucomisdr(r0, r1);
79bfeef6
PC
1921 jp_code = jps(0);
1922 w = je(i0);
1923 patch_at(jp_code, _jit->pc.w);
1924 return (w);
4a71579b
PC
1925}
1926dbopi(eq)
1927
1928static jit_word_t
1929_sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1930{
1931 ucomisdr(r0, r1);
79bfeef6 1932 return (jae(i0));
4a71579b
PC
1933}
1934dbopi(ge)
1935
1936static jit_word_t
1937_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1938{
1939 ucomisdr(r0, r1);
79bfeef6 1940 return (ja(i0));
4a71579b
PC
1941}
1942dbopi(gt)
1943
1944static jit_word_t
1945_sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1946{
79bfeef6 1947 jit_word_t w;
4a71579b
PC
1948 jit_word_t jp_code;
1949 jit_word_t jz_code;
1950 ucomisdr(r0, r1);
79bfeef6
PC
1951 jp_code = jps(0);
1952 jz_code = jzs(0);
1953 patch_at(jp_code, _jit->pc.w);
1954 w = jmpi(i0);
1955 patch_at(jz_code, _jit->pc.w);
1956 return (w);
4a71579b
PC
1957}
1958dbopi(ne)
1959
1960static jit_word_t
1961_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1962{
1963 ucomisdr(r0, r1);
79bfeef6 1964 return (jnae(i0));
4a71579b
PC
1965}
1966dbopi(unlt)
1967
1968static jit_word_t
1969_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1970{
79bfeef6 1971 jit_word_t w;
4a71579b 1972 if (r0 == r1)
79bfeef6 1973 w = jmpi(i0);
4a71579b
PC
1974 else {
1975 ucomisdr(r0, r1);
79bfeef6 1976 w = jna(i0);
4a71579b 1977 }
79bfeef6 1978 return (w);
4a71579b
PC
1979}
1980dbopi(unle)
1981
1982static jit_word_t
1983_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1984{
79bfeef6 1985 jit_word_t w;
4a71579b 1986 if (r0 == r1)
79bfeef6 1987 w = jmpi(i0);
4a71579b
PC
1988 else {
1989 ucomisdr(r0, r1);
79bfeef6 1990 w = je(i0);
4a71579b 1991 }
79bfeef6 1992 return (w);
4a71579b
PC
1993}
1994dbopi(uneq)
1995
1996static jit_word_t
1997_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1998{
79bfeef6 1999 jit_word_t w;
4a71579b 2000 if (r0 == r1)
79bfeef6 2001 w = jmpi(i0);
4a71579b
PC
2002 else {
2003 ucomisdr(r1, r0);
79bfeef6 2004 w = jna(i0);
4a71579b 2005 }
79bfeef6 2006 return (w);
4a71579b
PC
2007}
2008dbopi(unge)
2009
2010static jit_word_t
2011_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2012{
2013 ucomisdr(r1, r0);
79bfeef6 2014 return (jnae(i0));
4a71579b
PC
2015}
2016dbopi(ungt)
2017
2018static jit_word_t
2019_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2020{
2021 ucomisdr(r0, r1);
79bfeef6 2022 return (jne(i0));
4a71579b
PC
2023}
2024dbopi(ltgt)
2025
2026static jit_word_t
2027_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2028{
2029 ucomisdr(r0, r1);
79bfeef6 2030 return (jnp(i0));
4a71579b
PC
2031}
2032dbopi(ord)
2033
2034static jit_word_t
2035_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
2036{
2037 ucomisdr(r0, r1);
79bfeef6 2038 return (jp(i0));
4a71579b
PC
2039}
2040dbopi(unord)
2041# undef fopi
2042# undef fbopi
2043# undef bopi
2044# undef dbopi
2045# undef fpr_bopi
2046# undef fpr_opi
2047#endif