git subrepo clone (merge) https://github.com/rtissera/libchdr deps/libchdr
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86-sse.c
CommitLineData
4a71579b
PC
1/*
2 * Copyright (C) 2012-2019 Free Software Foundation, Inc.
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20#if PROTO
21# if __X32
22# define sse_address_p(i0) 1
23# else
24# if __X64_32
25# define sse_address_p(i0) ((jit_word_t)(i0) >= 0)
26# else
27# define sse_address_p(i0) can_sign_extend_int_p(i0)
28# endif
29# endif
30# define _XMM6_REGNO 6
31# define _XMM7_REGNO 7
32# define _XMM8_REGNO 8
33# define _XMM9_REGNO 9
34# define _XMM10_REGNO 10
35# define _XMM11_REGNO 11
36# define _XMM12_REGNO 12
37# define _XMM13_REGNO 13
38# define _XMM14_REGNO 14
39# define _XMM15_REGNO 15
40#define X86_SSE_MOV 0x10
41#define X86_SSE_MOV1 0x11
42#define X86_SSE_MOVLP 0x12
43#define X86_SSE_MOVHP 0x16
44#define X86_SSE_MOVA 0x28
45#define X86_SSE_CVTIS 0x2a
46#define X86_SSE_CVTTSI 0x2c
47#define X86_SSE_CVTSI 0x2d
48#define X86_SSE_UCOMI 0x2e
49#define X86_SSE_COMI 0x2f
50#define X86_SSE_ROUND 0x3a
51#define X86_SSE_SQRT 0x51
52#define X86_SSE_RSQRT 0x52
53#define X86_SSE_RCP 0x53
54#define X86_SSE_AND 0x54
55#define X86_SSE_ANDN 0x55
56#define X86_SSE_OR 0x56
57#define X86_SSE_XOR 0x57
58#define X86_SSE_ADD 0x58
59#define X86_SSE_MUL 0x59
60#define X86_SSE_CVTSD 0x5a
61#define X86_SSE_CVTDT 0x5b
62#define X86_SSE_SUB 0x5c
63#define X86_SSE_MIN 0x5d
64#define X86_SSE_DIV 0x5e
65#define X86_SSE_MAX 0x5f
66#define X86_SSE_X2G 0x6e
67#define X86_SSE_EQB 0x74
68#define X86_SSE_EQW 0x75
69#define X86_SSE_EQD 0x76
70#define X86_SSE_G2X 0x7e
71#define X86_SSE_MOV2 0xd6
72# define sser(c,r0,r1) _sser(_jit,c,r0,r1)
73static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
74# define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
75static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
76# define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
77static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
78# define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
79# define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1)
80# define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1)
81# define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1)
82# define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1)
83# define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1)
84# define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1)
85# define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1)
86# define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1)
87# define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1)
88# define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
89# define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
90# if __X64
91# define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
92# define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
93# define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
94# define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
95# else
96# define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
97# define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
98# endif
99# define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
100# define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
101# define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)
102# define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
103# define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
104# define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
105# define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
106# define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
107# define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
108# define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
109# define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
110# define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
111# define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1)
112# if __X64 && !__X64_32
113# define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
114static void
115_sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
116# else
117# define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1)
118# endif
119# define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
120# define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
121# define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
122# define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
123# define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
124static void
125_ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
126 jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
127# define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2)
128static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
129# define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0)
130static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
131# define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2)
132static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
133# define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0)
134static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
135# define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2)
136static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
137# define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0)
138static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
139# define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2)
140static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
141# define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0)
142static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
143# define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1)
144# define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0)
145static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
146# define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1)
147# define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0)
148static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
149# define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2)
150static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
151# define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0)
152static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
153# define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2)
154static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
155# define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0)
156static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
157# define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2)
158static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
159# define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0)
160static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
161# define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2)
162static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
163# define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0)
164static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
165# define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1)
166static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
167# define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1)
168static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
169# define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1)
170static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
171# define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1)
172static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
173# define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
174# define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
175# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
176# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
177static void
178_ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
179 jit_int32_t, jit_int32_t, jit_int32_t);
180#define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1)
181static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
182#define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
183static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
184# define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
185static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
186# define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
187# define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0)
188static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
189# define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2)
190# define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0)
191static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
192# define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2)
193static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
194# define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0)
195static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
196# define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1)
197# define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0)
198static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
199# define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1)
200# define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0)
201static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
202# define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2)
203static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
204# define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0)
205static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
206# define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1)
207# define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0)
208static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
209# define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2)
210# define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0)
211static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
212static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
213# define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2)
214static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
215# define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0)
216static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
217# define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2)
218static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
219# define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0)
220static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
221# define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2)
222# define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0)
223static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
224# define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2)
225static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
226# define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0)
227static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
228# define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1)
229# define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0)
230static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
231# define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1)
232# define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0)
233# define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0)
234static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
235# define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2)
236static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
237# define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
238static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
239# define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
240# define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
241static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
242# define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2)
243static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
244# define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
245static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
246# define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
247static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
248# define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
249static jit_word_t
250_sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
251# define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1)
252static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
253# define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1)
254static jit_word_t
255_sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
256# define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1)
257static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
258# define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1)
259static jit_word_t
260_sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
261# define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1)
262static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
263# define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1)
264static jit_word_t
265_sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
266# define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1)
267static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
268# define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1)
269static jit_word_t
270_sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
271# define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1)
272static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
273# define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1)
274static jit_word_t
275_sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
276# define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1)
277static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
278# define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1)
279static jit_word_t
280_sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
281# define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1)
282static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
283# define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1)
284static jit_word_t
285_sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
286# define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1)
287static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
288# define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1)
289static jit_word_t
290_sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
291# define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1)
292static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
293# define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1)
294static jit_word_t
295_sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
296# define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1)
297static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
298# define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1)
299static jit_word_t
300_sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
301# define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1)
302static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
303# define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1)
304static jit_word_t
305_sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
306# define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1)
307static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
308# define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1)
309static jit_word_t
310_sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
311# define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1)
312static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
313# define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1)
314static jit_word_t
315_sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
316#define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1)
317static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
318#define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
319static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
320# define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
321# define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
322static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
323# define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2)
324# define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0)
325static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
326# define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2)
327static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
328# define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0)
329static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
330# define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1)
331# define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0)
332static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
333# define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1)
334# define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0)
335static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
336# define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2)
337static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
338# define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0)
339static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
340# define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1)
341# define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0)
342static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
343# define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2)
344static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
345# define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0)
346static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
347# define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2)
348static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
349# define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0)
350static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
351# define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2)
352static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
353# define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0)
354static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
355# define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2)
356# define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0)
357static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
358# define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2)
359static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
360# define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0)
361static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
362# define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1)
363# define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0)
364static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
365# define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1)
366# define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0)
367static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
368# define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0)
369# define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0)
370static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
371# define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2)
372static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
373# define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0)
374static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
375# define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1)
376# define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1)
377# define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0)
378static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
379# define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2)
380static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
381# define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1)
382static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
383static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
384# define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1)
385static jit_word_t
386_sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
387# define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1)
388static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
389# define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1)
390static jit_word_t
391_sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
392# define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1)
393static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
394# define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1)
395static jit_word_t
396_sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
397# define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1)
398static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
399# define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1)
400static jit_word_t
401_sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
402# define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1)
403static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
404# define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1)
405static jit_word_t
406_sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
407# define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1)
408static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
409# define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1)
410static jit_word_t
411_sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
412# define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1)
413static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
414# define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1)
415static jit_word_t
416_sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
417# define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1)
418static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
419# define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1)
420static jit_word_t
421_sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
422# define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1)
423static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
424# define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1)
425static jit_word_t
426_sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
427# define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1)
428static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
429# define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1)
430static jit_word_t
431_sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
432# define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1)
433static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
434# define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1)
435static jit_word_t
436_sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
437# define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1)
438static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
439# define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1)
440static jit_word_t
441_sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
442# define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1)
443static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
444# define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1)
445static jit_word_t
446_sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
447# define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1)
448static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
449# define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1)
450static jit_word_t
451_sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
452#endif
453
454#if CODE
455# define fpr_opi(name, type, size) \
456static void \
457_sse_##name##i_##type(jit_state_t *_jit, \
458 jit_int32_t r0, jit_int32_t r1, \
459 jit_float##size##_t *i0) \
460{ \
461 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
462 assert(jit_sse_reg_p(reg)); \
463 sse_movi_##type(rn(reg), i0); \
464 sse_##name##r_##type(r0, r1, rn(reg)); \
465 jit_unget_reg(reg); \
466}
467# define fpr_bopi(name, type, size) \
468static jit_word_t \
469_sse_b##name##i_##type(jit_state_t *_jit, \
470 jit_word_t i0, jit_int32_t r0, \
471 jit_float##size##_t *i1) \
472{ \
473 jit_word_t word; \
474 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
475 jit_class_nospill); \
476 assert(jit_sse_reg_p(reg)); \
477 sse_movi_##type(rn(reg), i1); \
478 word = sse_b##name##r_##type(i0, r0, rn(reg)); \
479 jit_unget_reg(reg); \
480 return (word); \
481}
482# define fopi(name) fpr_opi(name, f, 32)
483# define fbopi(name) fpr_bopi(name, f, 32)
484# define dopi(name) fpr_opi(name, d, 64)
485# define dbopi(name) fpr_bopi(name, d, 64)
486static void
487_sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
488{
489 rex(0, 0, r0, 0, r1);
490 ic(0x0f);
491 ic(c);
492 mrm(0x03, r7(r0), r7(r1));
493}
494
495static void
496_ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
497 jit_int32_t r0, jit_int32_t r1)
498{
499 ic(p);
500 rex(0, 0, r0, 0, r1);
501 ic(0x0f);
502 ic(c);
503 mrm(0x03, r7(r0), r7(r1));
504}
505
506static void
507_ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
508 jit_int32_t m, jit_int32_t i)
509{
510 ic(0x66);
511 rex(0, 0, 0, 0, r0);
512 ic(0x0f);
513 ic(c);
514 mrm(0x03, r7(m), r7(r0));
515 ic(i);
516}
517
518#if __X64
519static void
520_sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
521 jit_int32_t r0, jit_int32_t r1)
522{
523 ic(p);
524 rex(0, 1, r0, 0, r1);
525 ic(0x0f);
526 ic(c);
527 mrm(0x03, r7(r0), r7(r1));
528}
529#endif
530
531static void
532_ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
533 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
534{
535 ic(px);
536 rex(0, 0, rd, ri, rb);
537 ic(0x0f);
538 ic(code);
539 rx(rd, md, rb, ri, ms);
540}
541
542static void
543_sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
544{
545 if (r0 == r1)
546 addssr(r0, r2);
547 else if (r0 == r2)
548 addssr(r0, r1);
549 else {
550 sse_movr_f(r0, r1);
551 addssr(r0, r2);
552 }
553}
554
555fopi(add)
556
557static void
558_sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
559{
560 if (r0 == r1)
561 addsdr(r0, r2);
562 else if (r0 == r2)
563 addsdr(r0, r1);
564 else {
565 sse_movr_d(r0, r1);
566 addsdr(r0, r2);
567 }
568}
569
570dopi(add)
571
572static void
573_sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
574{
575 jit_int32_t reg;
576 if (r0 == r1)
577 subssr(r0, r2);
578 else if (r0 == r2) {
579 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
580 sse_movr_f(rn(reg), r0);
581 sse_movr_f(r0, r1);
582 subssr(r0, rn(reg));
583 jit_unget_reg(reg);
584 }
585 else {
586 sse_movr_f(r0, r1);
587 subssr(r0, r2);
588 }
589}
590
591fopi(sub)
592
593static void
594_sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
595{
596 jit_int32_t reg;
597 if (r0 == r1)
598 subsdr(r0, r2);
599 else if (r0 == r2) {
600 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
601 sse_movr_d(rn(reg), r0);
602 sse_movr_d(r0, r1);
603 subsdr(r0, rn(reg));
604 jit_unget_reg(reg);
605 }
606 else {
607 sse_movr_d(r0, r1);
608 subsdr(r0, r2);
609 }
610}
611
612dopi(sub)
613
614fopi(rsb)
615
616dopi(rsb)
617
618static void
619_sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
620{
621 if (r0 == r1)
622 mulssr(r0, r2);
623 else if (r0 == r2)
624 mulssr(r0, r1);
625 else {
626 sse_movr_f(r0, r1);
627 mulssr(r0, r2);
628 }
629}
630
631fopi(mul)
632
633static void
634_sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
635{
636 if (r0 == r1)
637 mulsdr(r0, r2);
638 else if (r0 == r2)
639 mulsdr(r0, r1);
640 else {
641 sse_movr_d(r0, r1);
642 mulsdr(r0, r2);
643 }
644}
645
646dopi(mul)
647
648static void
649_sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
650{
651 jit_int32_t reg;
652 if (r0 == r1)
653 divssr(r0, r2);
654 else if (r0 == r2) {
655 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
656 sse_movr_f(rn(reg), r0);
657 sse_movr_f(r0, r1);
658 divssr(r0, rn(reg));
659 jit_unget_reg(reg);
660 }
661 else {
662 sse_movr_f(r0, r1);
663 divssr(r0, r2);
664 }
665}
666
667fopi(div)
668
669static void
670_sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
671{
672 jit_int32_t reg;
673 if (r0 == r1)
674 divsdr(r0, r2);
675 else if (r0 == r2) {
676 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
677 sse_movr_d(rn(reg), r0);
678 sse_movr_d(r0, r1);
679 divsdr(r0, rn(reg));
680 jit_unget_reg(reg);
681 }
682 else {
683 sse_movr_d(r0, r1);
684 divsdr(r0, r2);
685 }
686}
687
688dopi(div)
689
690static void
691_sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
692{
693 jit_int32_t reg;
694 if (r0 == r1) {
695 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
696 pcmpeqlr(rn(reg), rn(reg));
697 psrl(rn(reg), 1);
698 andpsr(r0, rn(reg));
699 jit_unget_reg(reg);
700 }
701 else {
702 pcmpeqlr(r0, r0);
703 psrl(r0, 1);
704 andpsr(r0, r1);
705 }
706}
707
708static void
709_sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
710{
711 jit_int32_t reg;
712 if (r0 == r1) {
713 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
714 pcmpeqlr(rn(reg), rn(reg));
715 psrq(rn(reg), 1);
716 andpdr(r0, rn(reg));
717 jit_unget_reg(reg);
718 }
719 else {
720 pcmpeqlr(r0, r0);
721 psrq(r0, 1);
722 andpdr(r0, r1);
723 }
724}
725
726static void
727_sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
728{
729 jit_int32_t freg, ireg;
730 ireg = jit_get_reg(jit_class_gpr);
731 imovi(rn(ireg), 0x80000000);
732 if (r0 == r1) {
733 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
734 movdlxr(rn(freg), rn(ireg));
735 xorpsr(r0, rn(freg));
736 jit_unget_reg(freg);
737 }
738 else {
739 movdlxr(r0, rn(ireg));
740 xorpsr(r0, r1);
741 }
742 jit_unget_reg(ireg);
743}
744
745static void
746_sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
747{
748 jit_int32_t freg, ireg;
749 ireg = jit_get_reg(jit_class_gpr);
750 imovi(rn(ireg), 0x80000000);
751 if (r0 == r1) {
752 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
753 movdlxr(rn(freg), rn(ireg));
754 pslq(rn(freg), 32);
755 xorpdr(r0, rn(freg));
756 jit_unget_reg(freg);
757 }
758 else {
759 movdlxr(r0, rn(ireg));
760 pslq(r0, 32);
761 xorpdr(r0, r1);
762 }
763 jit_unget_reg(ireg);
764}
765
766static void
767_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
768 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
769{
770 jit_bool_t rc;
771 jit_int32_t reg;
772 if ((rc = reg8_p(r0)))
773 reg = r0;
774 else {
775 reg = _RAX_REGNO;
776 movr(r0, reg);
777 }
778 ixorr(reg, reg);
779 if (d)
780 ucomisdr(r2, r1);
781 else
782 ucomissr(r2, r1);
783 cc(code, reg);
784 if (!rc)
785 xchgr(r0, reg);
786}
787
788static void
789_sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
790{
791 if (r0 != r1)
792 ssexr(0xf3, X86_SSE_MOV, r0, r1);
793}
794
795static void
796_sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
797{
798 union {
799 jit_int32_t i;
800 jit_float32_t f;
801 } data;
802 jit_int32_t reg;
803 jit_bool_t ldi;
804
805 data.f = *i0;
806 if (data.f == 0.0 && !(data.i & 0x80000000))
807 xorpsr(r0, r0);
808 else {
809 ldi = !_jitc->no_data;
810#if __X64
811 /* if will allocate a register for offset, just use immediate */
812 if (ldi && !sse_address_p(i0))
813 ldi = 0;
814#endif
815 if (ldi)
816 sse_ldi_f(r0, (jit_word_t)i0);
817 else {
818 reg = jit_get_reg(jit_class_gpr);
819 movi(rn(reg), data.i);
820 movdlxr(r0, rn(reg));
821 jit_unget_reg(reg);
822 }
823 }
824}
825
826fopi(lt)
827fopi(le)
828
829static void
830_sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
831{
832 jit_bool_t rc;
833 jit_int32_t reg;
834 jit_word_t jp_code;
835 if ((rc = reg8_p(r0)))
836 reg = r0;
837 else {
838 reg = _RAX_REGNO;
839 movr(r0, _RAX_REGNO);
840 }
841 ixorr(reg, reg);
842 ucomissr(r2, r1);
843 jpes(0);
844 jp_code = _jit->pc.w;
845 cc(X86_CC_E, reg);
846 patch_rel_char(jp_code, _jit->pc.w);
847 if (!rc)
848 xchgr(r0, reg);
849}
850
851fopi(eq)
852fopi(ge)
853fopi(gt)
854
855static void
856_sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
857{
858 jit_bool_t rc;
859 jit_int32_t reg;
860 jit_word_t jp_code;
861 if ((rc = reg8_p(r0)))
862 reg = r0;
863 else {
864 reg = _RAX_REGNO;
865 movr(r0, _RAX_REGNO);
866 }
867 imovi(reg, 1);
868 ucomissr(r2, r1);
869 jpes(0);
870 jp_code = _jit->pc.w;
871 cc(X86_CC_NE, reg);
872 patch_rel_char(jp_code, _jit->pc.w);
873 if (!rc)
874 xchgr(r0, reg);
875}
876
877fopi(ne)
878fopi(unlt)
879
880static void
881_sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
882{
883 if (r1 == r2)
884 movi(r0, 1);
885 else
886 ssecmpf(X86_CC_NA, r0, r2, r1);
887}
888
889fopi(unle)
890
891static void
892_sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
893{
894 if (r1 == r2)
895 movi(r0, 1);
896 else
897 ssecmpf(X86_CC_E, r0, r1, r2);
898}
899
900fopi(uneq)
901
902static void
903_sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
904{
905 if (r1 == r2)
906 movi(r0, 1);
907 else
908 ssecmpf(X86_CC_NA, r0, r1, r2);
909}
910
911fopi(unge)
912fopi(ungt)
913
914static void
915_sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
916{
917 if (r1 == r2)
918 ixorr(r0, r0);
919 else
920 ssecmpf(X86_CC_NE, r0, r1, r2);
921}
922
923fopi(ltgt)
924fopi(ord)
925fopi(unord)
926
927static void
928_sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
929{
930 jit_int32_t reg;
931 if (sse_address_p(i0))
932 movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
933 else {
934 reg = jit_get_reg(jit_class_gpr);
935 movi(rn(reg), i0);
936 sse_ldr_f(r0, rn(reg));
937 jit_unget_reg(reg);
938 }
939}
940
941static void
942_sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
943{
944#if __X64_32
945 jit_int32_t reg;
946 reg = jit_get_reg(jit_class_gpr);
947 addr(rn(reg), r1, r2);
948 sse_ldr_f(r0, rn(reg));
949 jit_unget_reg(reg);
950#else
951 movssmr(0, r1, r2, _SCL1, r0);
952#endif
953}
954
955static void
956_sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
957{
958 jit_int32_t reg;
959 if (can_sign_extend_int_p(i0))
960 movssmr(i0, r1, _NOREG, _SCL1, r0);
961 else {
962 reg = jit_get_reg(jit_class_gpr);
963#if __X64_32
964 addi(rn(reg), r1, i0);
965 sse_ldr_f(r0, rn(reg));
966#else
967 movi(rn(reg), i0);
968 sse_ldxr_f(r0, r1, rn(reg));
969#endif
970 jit_unget_reg(reg);
971 }
972}
973
974static void
975_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
976{
977 jit_int32_t reg;
978 if (sse_address_p(i0))
979 movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
980 else {
981 reg = jit_get_reg(jit_class_gpr);
982 movi(rn(reg), i0);
983 sse_str_f(rn(reg), r0);
984 jit_unget_reg(reg);
985 }
986}
987
988static void
989_sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
990{
991#if __X64_32
992 jit_int32_t reg;
993 reg = jit_get_reg(jit_class_gpr);
994 addr(rn(reg), r0, r1);
995 sse_str_f(rn(reg), r2);
996 jit_unget_reg(reg);
997#else
998 movssrm(r2, 0, r0, r1, _SCL1);
999#endif
1000}
1001
1002static void
1003_sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1004{
1005 jit_int32_t reg;
1006 if (can_sign_extend_int_p(i0))
1007 movssrm(r1, i0, r0, _NOREG, _SCL1);
1008 else {
1009 reg = jit_get_reg(jit_class_gpr);
1010#if __X64_32
1011 addi(rn(reg), r0, i0);
1012 sse_str_f(rn(reg), r1);
1013#else
1014 movi(rn(reg), i0);
1015 sse_stxr_f(rn(reg), r0, r1);
1016#endif
1017 jit_unget_reg(reg);
1018 }
1019}
1020
1021static jit_word_t
1022_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1023{
1024 ucomissr(r1, r0);
1025 ja(i0);
1026 return (_jit->pc.w);
1027}
1028fbopi(lt)
1029
1030static jit_word_t
1031_sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1032{
1033 ucomissr(r1, r0);
1034 jae(i0);
1035 return (_jit->pc.w);
1036}
1037fbopi(le)
1038
1039static jit_word_t
1040_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1041{
1042 jit_word_t jp_code;
1043 ucomissr(r0, r1);
1044 jps(0);
1045 jp_code = _jit->pc.w;
1046 je(i0);
1047 patch_rel_char(jp_code, _jit->pc.w);
1048 return (_jit->pc.w);
1049}
1050fbopi(eq)
1051
1052static jit_word_t
1053_sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1054{
1055 ucomissr(r0, r1);
1056 jae(i0);
1057 return (_jit->pc.w);
1058}
1059fbopi(ge)
1060
1061static jit_word_t
1062_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1063{
1064 ucomissr(r0, r1);
1065 ja(i0);
1066 return (_jit->pc.w);
1067}
1068fbopi(gt)
1069
1070static jit_word_t
1071_sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1072{
1073 jit_word_t jp_code;
1074 jit_word_t jz_code;
1075 ucomissr(r0, r1);
1076 jps(0);
1077 jp_code = _jit->pc.w;
1078 jzs(0);
1079 jz_code = _jit->pc.w;
1080 patch_rel_char(jp_code, _jit->pc.w);
1081 jmpi(i0);
1082 patch_rel_char(jz_code, _jit->pc.w);
1083 return (_jit->pc.w);
1084}
1085fbopi(ne)
1086
1087static jit_word_t
1088_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1089{
1090 ucomissr(r0, r1);
1091 jnae(i0);
1092 return (_jit->pc.w);
1093}
1094fbopi(unlt)
1095
1096static jit_word_t
1097_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1098{
1099 if (r0 == r1)
1100 jmpi(i0);
1101 else {
1102 ucomissr(r0, r1);
1103 jna(i0);
1104 }
1105 return (_jit->pc.w);
1106}
1107fbopi(unle)
1108
1109static jit_word_t
1110_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1111{
1112 if (r0 == r1)
1113 jmpi(i0);
1114 else {
1115 ucomissr(r0, r1);
1116 je(i0);
1117 }
1118 return (_jit->pc.w);
1119}
1120fbopi(uneq)
1121
1122static jit_word_t
1123_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1124{
1125 if (r0 == r1)
1126 jmpi(i0);
1127 else {
1128 ucomissr(r1, r0);
1129 jna(i0);
1130 }
1131 return (_jit->pc.w);
1132}
1133fbopi(unge)
1134
1135static jit_word_t
1136_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1137{
1138 ucomissr(r1, r0);
1139 jnae(i0);
1140 return (_jit->pc.w);
1141}
1142fbopi(ungt)
1143
1144static jit_word_t
1145_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1146{
1147 ucomissr(r0, r1);
1148 jne(i0);
1149 return (_jit->pc.w);
1150}
1151fbopi(ltgt)
1152
1153static jit_word_t
1154_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1155{
1156 ucomissr(r0, r1);
1157 jnp(i0);
1158 return (_jit->pc.w);
1159}
1160fbopi(ord)
1161
1162static jit_word_t
1163_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1164{
1165 ucomissr(r0, r1);
1166 jp(i0);
1167 return (_jit->pc.w);
1168}
1169fbopi(unord)
1170
1171dopi(lt)
1172dopi(le)
1173
1174static void
1175_sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1176{
1177 jit_bool_t rc;
1178 jit_int32_t reg;
1179 jit_word_t jp_code;
1180 if ((rc = reg8_p(r0)))
1181 reg = r0;
1182 else {
1183 reg = _RAX_REGNO;
1184 movr(r0, _RAX_REGNO);
1185 }
1186 ixorr(reg, reg);
1187 ucomisdr(r2, r1);
1188 jpes(0);
1189 jp_code = _jit->pc.w;
1190 cc(X86_CC_E, reg);
1191 patch_rel_char(jp_code, _jit->pc.w);
1192 if (!rc)
1193 xchgr(r0, reg);
1194}
1195
1196dopi(eq)
1197dopi(ge)
1198dopi(gt)
1199
1200static void
1201_sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1202{
1203 jit_bool_t rc;
1204 jit_int32_t reg;
1205 jit_word_t jp_code;
1206 if ((rc = reg8_p(r0)))
1207 reg = r0;
1208 else {
1209 reg = _RAX_REGNO;
1210 movr(r0, _RAX_REGNO);
1211 }
1212 imovi(reg, 1);
1213 ucomisdr(r2, r1);
1214 jpes(0);
1215 jp_code = _jit->pc.w;
1216 cc(X86_CC_NE, reg);
1217 patch_rel_char(jp_code, _jit->pc.w);
1218 if (!rc)
1219 xchgr(r0, reg);
1220}
1221
1222dopi(ne)
1223dopi(unlt)
1224
1225static void
1226_sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1227{
1228 if (r1 == r2)
1229 movi(r0, 1);
1230 else
1231 ssecmpd(X86_CC_NA, r0, r2, r1);
1232}
1233
1234dopi(unle)
1235
1236static void
1237_sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1238{
1239 if (r1 == r2)
1240 movi(r0, 1);
1241 else
1242 ssecmpd(X86_CC_E, r0, r1, r2);
1243}
1244
1245dopi(uneq)
1246
1247static void
1248_sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1249{
1250 if (r1 == r2)
1251 movi(r0, 1);
1252 else
1253 ssecmpd(X86_CC_NA, r0, r1, r2);
1254}
1255
1256dopi(unge)
1257dopi(ungt)
1258
1259static void
1260_sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1261{
1262 if (r1 == r2)
1263 ixorr(r0, r0);
1264 else
1265 ssecmpd(X86_CC_NE, r0, r1, r2);
1266}
1267
1268dopi(ltgt)
1269dopi(ord)
1270dopi(unord)
1271
1272static void
1273_sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1274{
1275 if (r0 != r1)
1276 ssexr(0xf2, X86_SSE_MOV, r0, r1);
1277}
1278
1279static void
1280_sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1281{
1282 union {
1283 jit_int32_t ii[2];
1284 jit_word_t w;
1285 jit_float64_t d;
1286 } data;
1287 jit_int32_t reg;
1288 jit_bool_t ldi;
1289
1290 data.d = *i0;
1291 if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1292 xorpdr(r0, r0);
1293 else {
1294 ldi = !_jitc->no_data;
1295#if __X64
1296 /* if will allocate a register for offset, just use immediate */
1297 if (ldi && !sse_address_p(i0))
1298 ldi = 0;
1299#endif
1300 if (ldi)
1301 sse_ldi_d(r0, (jit_word_t)i0);
1302 else {
1303 reg = jit_get_reg(jit_class_gpr);
1304#if __X64 && !__X64_32
1305 movi(rn(reg), data.w);
1306 movdqxr(r0, rn(reg));
1307 jit_unget_reg(reg);
1308#else
1309 movi(rn(reg), data.ii[0]);
1310 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1311 movi(rn(reg), data.ii[1]);
1312 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1313 jit_unget_reg(reg);
1314 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1315#endif
1316 }
1317 }
1318}
1319
1320static void
1321_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1322{
1323 jit_int32_t reg;
1324 if (sse_address_p(i0))
1325 movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
1326 else {
1327 reg = jit_get_reg(jit_class_gpr);
1328 movi(rn(reg), i0);
1329 sse_ldr_d(r0, rn(reg));
1330 jit_unget_reg(reg);
1331 }
1332}
1333
1334static void
1335_sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1336{
1337#if __X64_32
1338 jit_int32_t reg;
1339 reg = jit_get_reg(jit_class_gpr);
1340 addr(rn(reg), r1, r2);
1341 sse_ldr_d(r0, rn(reg));
1342 jit_unget_reg(reg);
1343#else
1344 movsdmr(0, r1, r2, _SCL1, r0);
1345#endif
1346}
1347
1348static void
1349_sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1350{
1351 jit_int32_t reg;
1352 if (can_sign_extend_int_p(i0))
1353 movsdmr(i0, r1, _NOREG, _SCL1, r0);
1354 else {
1355 reg = jit_get_reg(jit_class_gpr);
1356#if __X64_32
1357 addi(rn(reg), r1, i0);
1358 sse_ldr_d(r0, rn(reg));
1359#else
1360 movi(rn(reg), i0);
1361 sse_ldxr_d(r0, r1, rn(reg));
1362#endif
1363 jit_unget_reg(reg);
1364 }
1365}
1366
1367static void
1368_sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1369{
1370 jit_int32_t reg;
1371 if (sse_address_p(i0))
1372 movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
1373 else {
1374 reg = jit_get_reg(jit_class_gpr);
1375 movi(rn(reg), i0);
1376 sse_str_d(rn(reg), r0);
1377 jit_unget_reg(reg);
1378 }
1379}
1380
1381static void
1382_sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1383{
1384#if __X64_32
1385 jit_int32_t reg;
1386 reg = jit_get_reg(jit_class_gpr);
1387 addr(rn(reg), r0, r1);
1388 sse_str_d(rn(reg), r2);
1389 jit_unget_reg(reg);
1390#else
1391 movsdrm(r2, 0, r0, r1, _SCL1);
1392#endif
1393}
1394
1395static void
1396_sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1397{
1398 jit_int32_t reg;
1399 if (can_sign_extend_int_p(i0))
1400 movsdrm(r1, i0, r0, _NOREG, _SCL1);
1401 else {
1402 reg = jit_get_reg(jit_class_gpr);
1403#if __X64_32
1404 addi(rn(reg), r0, i0);
1405 sse_str_d(rn(reg), r1);
1406#else
1407 movi(rn(reg), i0);
1408 sse_stxr_f(rn(reg), r0, r1);
1409#endif
1410 jit_unget_reg(reg);
1411 }
1412}
1413
1414static jit_word_t
1415_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1416{
1417 ucomisdr(r1, r0);
1418 ja(i0);
1419 return (_jit->pc.w);
1420}
1421dbopi(lt)
1422
1423static jit_word_t
1424_sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1425{
1426 ucomisdr(r1, r0);
1427 jae(i0);
1428 return (_jit->pc.w);
1429}
1430dbopi(le)
1431
1432static jit_word_t
1433_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1434{
1435 jit_word_t jp_code;
1436 ucomisdr(r0, r1);
1437 jps(0);
1438 jp_code = _jit->pc.w;
1439 je(i0);
1440 patch_rel_char(jp_code, _jit->pc.w);
1441 return (_jit->pc.w);
1442}
1443dbopi(eq)
1444
1445static jit_word_t
1446_sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1447{
1448 ucomisdr(r0, r1);
1449 jae(i0);
1450 return (_jit->pc.w);
1451}
1452dbopi(ge)
1453
1454static jit_word_t
1455_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1456{
1457 ucomisdr(r0, r1);
1458 ja(i0);
1459 return (_jit->pc.w);
1460}
1461dbopi(gt)
1462
1463static jit_word_t
1464_sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1465{
1466 jit_word_t jp_code;
1467 jit_word_t jz_code;
1468 ucomisdr(r0, r1);
1469 jps(0);
1470 jp_code = _jit->pc.w;
1471 jzs(0);
1472 jz_code = _jit->pc.w;
1473 patch_rel_char(jp_code, _jit->pc.w);
1474 jmpi(i0);
1475 patch_rel_char(jz_code, _jit->pc.w);
1476 return (_jit->pc.w);
1477}
1478dbopi(ne)
1479
1480static jit_word_t
1481_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1482{
1483 ucomisdr(r0, r1);
1484 jnae(i0);
1485 return (_jit->pc.w);
1486}
1487dbopi(unlt)
1488
1489static jit_word_t
1490_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1491{
1492 if (r0 == r1)
1493 jmpi(i0);
1494 else {
1495 ucomisdr(r0, r1);
1496 jna(i0);
1497 }
1498 return (_jit->pc.w);
1499}
1500dbopi(unle)
1501
1502static jit_word_t
1503_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1504{
1505 if (r0 == r1)
1506 jmpi(i0);
1507 else {
1508 ucomisdr(r0, r1);
1509 je(i0);
1510 }
1511 return (_jit->pc.w);
1512}
1513dbopi(uneq)
1514
1515static jit_word_t
1516_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1517{
1518 if (r0 == r1)
1519 jmpi(i0);
1520 else {
1521 ucomisdr(r1, r0);
1522 jna(i0);
1523 }
1524 return (_jit->pc.w);
1525}
1526dbopi(unge)
1527
1528static jit_word_t
1529_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1530{
1531 ucomisdr(r1, r0);
1532 jnae(i0);
1533 return (_jit->pc.w);
1534}
1535dbopi(ungt)
1536
1537static jit_word_t
1538_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1539{
1540 ucomisdr(r0, r1);
1541 jne(i0);
1542 return (_jit->pc.w);
1543}
1544dbopi(ltgt)
1545
1546static jit_word_t
1547_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1548{
1549 ucomisdr(r0, r1);
1550 jnp(i0);
1551 return (_jit->pc.w);
1552}
1553dbopi(ord)
1554
1555static jit_word_t
1556_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1557{
1558 ucomisdr(r0, r1);
1559 jp(i0);
1560 return (_jit->pc.w);
1561}
1562dbopi(unord)
1563# undef fopi
1564# undef fbopi
1565# undef bopi
1566# undef dbopi
1567# undef fpr_bopi
1568# undef fpr_opi
1569#endif