update libchdr
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86-sse.c
CommitLineData
4a71579b 1/*
79bfeef6 2 * Copyright (C) 2012-2023 Free Software Foundation, Inc.
4a71579b
PC
3 *
4 * This file is part of GNU lightning.
5 *
6 * GNU lightning is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
10 *
11 * GNU lightning is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 * License for more details.
15 *
16 * Authors:
17 * Paulo Cesar Pereira de Andrade
18 */
19
20#if PROTO
4a71579b
PC
21# define _XMM6_REGNO 6
22# define _XMM7_REGNO 7
23# define _XMM8_REGNO 8
24# define _XMM9_REGNO 9
25# define _XMM10_REGNO 10
26# define _XMM11_REGNO 11
27# define _XMM12_REGNO 12
28# define _XMM13_REGNO 13
29# define _XMM14_REGNO 14
30# define _XMM15_REGNO 15
31#define X86_SSE_MOV 0x10
32#define X86_SSE_MOV1 0x11
33#define X86_SSE_MOVLP 0x12
34#define X86_SSE_MOVHP 0x16
35#define X86_SSE_MOVA 0x28
36#define X86_SSE_CVTIS 0x2a
37#define X86_SSE_CVTTSI 0x2c
38#define X86_SSE_CVTSI 0x2d
39#define X86_SSE_UCOMI 0x2e
40#define X86_SSE_COMI 0x2f
41#define X86_SSE_ROUND 0x3a
42#define X86_SSE_SQRT 0x51
43#define X86_SSE_RSQRT 0x52
44#define X86_SSE_RCP 0x53
45#define X86_SSE_AND 0x54
46#define X86_SSE_ANDN 0x55
47#define X86_SSE_OR 0x56
48#define X86_SSE_XOR 0x57
49#define X86_SSE_ADD 0x58
50#define X86_SSE_MUL 0x59
51#define X86_SSE_CVTSD 0x5a
52#define X86_SSE_CVTDT 0x5b
53#define X86_SSE_SUB 0x5c
54#define X86_SSE_MIN 0x5d
55#define X86_SSE_DIV 0x5e
56#define X86_SSE_MAX 0x5f
57#define X86_SSE_X2G 0x6e
58#define X86_SSE_EQB 0x74
59#define X86_SSE_EQW 0x75
60#define X86_SSE_EQD 0x76
61#define X86_SSE_G2X 0x7e
62#define X86_SSE_MOV2 0xd6
63# define sser(c,r0,r1) _sser(_jit,c,r0,r1)
64static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
65# define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
66static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
67# define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
68static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
69# define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
70# define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1)
71# define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1)
72# define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1)
73# define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1)
74# define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1)
75# define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1)
76# define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1)
77# define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1)
78# define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1)
79# define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
80# define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
81# if __X64
82# define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
83# define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
84# define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
85# define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
86# else
87# define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
88# define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
89# endif
90# define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
91# define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
92# define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)
93# define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
94# define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
95# define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
96# define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
97# define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
98# define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
99# define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
100# define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
101# define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
102# define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1)
103# if __X64 && !__X64_32
104# define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
105static void
106_sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
107# else
108# define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1)
109# endif
110# define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
111# define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
112# define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
113# define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
114# define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
115static void
116_ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
117 jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
118# define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2)
119static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
120# define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0)
121static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
122# define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2)
123static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
124# define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0)
125static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
126# define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2)
127static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
128# define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0)
129static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
130# define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2)
131static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
132# define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0)
133static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
134# define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1)
135# define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0)
136static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
137# define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1)
138# define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0)
139static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
140# define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2)
141static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
142# define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0)
143static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
144# define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2)
145static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
146# define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0)
147static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
148# define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2)
149static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
150# define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0)
151static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
152# define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2)
153static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
154# define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0)
155static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
156# define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1)
157static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
158# define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1)
159static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
160# define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1)
161static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
162# define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1)
163static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
164# define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
165# define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
166# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
167# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
168static void
169_ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
170 jit_int32_t, jit_int32_t, jit_int32_t);
171#define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1)
172static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
173#define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
174static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
175# define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
176static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
177# define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
178# define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0)
179static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
180# define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2)
181# define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0)
182static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
183# define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2)
184static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
185# define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0)
186static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
187# define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1)
188# define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0)
189static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
190# define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1)
191# define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0)
192static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
193# define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2)
194static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
195# define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0)
196static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
197# define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1)
198# define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0)
199static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
200# define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2)
201# define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0)
202static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
203static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
204# define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2)
205static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
206# define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0)
207static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
208# define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2)
209static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
210# define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0)
211static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
212# define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2)
213# define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0)
214static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
215# define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2)
216static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
217# define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0)
218static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
219# define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1)
220# define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0)
221static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
222# define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1)
223# define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0)
224# define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0)
225static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
226# define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2)
227static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
228# define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
229static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
230# define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
231# define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
232static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
233# define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2)
234static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
235# define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
236static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
237# define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
238static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
239# define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
240static jit_word_t
241_sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
242# define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1)
243static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
244# define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1)
245static jit_word_t
246_sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
247# define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1)
248static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
249# define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1)
250static jit_word_t
251_sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
252# define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1)
253static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
254# define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1)
255static jit_word_t
256_sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
257# define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1)
258static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
259# define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1)
260static jit_word_t
261_sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
262# define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1)
263static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
264# define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1)
265static jit_word_t
266_sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
267# define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1)
268static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
269# define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1)
270static jit_word_t
271_sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
272# define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1)
273static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
274# define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1)
275static jit_word_t
276_sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
277# define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1)
278static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
279# define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1)
280static jit_word_t
281_sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
282# define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1)
283static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
284# define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1)
285static jit_word_t
286_sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
287# define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1)
288static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
289# define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1)
290static jit_word_t
291_sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
292# define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1)
293static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
294# define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1)
295static jit_word_t
296_sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
297# define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1)
298static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
299# define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1)
300static jit_word_t
301_sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
302# define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1)
303static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
304# define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1)
305static jit_word_t
306_sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
307#define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1)
308static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
309#define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
310static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
311# define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
312# define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
313static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
314# define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2)
315# define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0)
316static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
317# define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2)
318static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
319# define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0)
320static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
321# define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1)
322# define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0)
323static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
324# define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1)
325# define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0)
326static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
327# define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2)
328static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
329# define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0)
330static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
331# define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1)
332# define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0)
333static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
334# define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2)
335static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
336# define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0)
337static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
338# define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2)
339static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
340# define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0)
341static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
342# define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2)
343static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
344# define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0)
345static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
346# define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2)
347# define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0)
348static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
349# define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2)
350static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
351# define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0)
352static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
353# define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1)
354# define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0)
355static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
356# define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1)
357# define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0)
358static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
359# define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0)
360# define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0)
361static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
362# define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2)
363static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
364# define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0)
365static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
366# define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1)
367# define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1)
368# define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0)
369static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
370# define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2)
371static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
372# define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1)
373static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
374static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
375# define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1)
376static jit_word_t
377_sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
378# define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1)
379static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
380# define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1)
381static jit_word_t
382_sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
383# define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1)
384static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
385# define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1)
386static jit_word_t
387_sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
388# define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1)
389static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
390# define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1)
391static jit_word_t
392_sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
393# define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1)
394static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
395# define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1)
396static jit_word_t
397_sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
398# define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1)
399static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
400# define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1)
401static jit_word_t
402_sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
403# define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1)
404static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
405# define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1)
406static jit_word_t
407_sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
408# define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1)
409static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
410# define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1)
411static jit_word_t
412_sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
413# define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1)
414static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
415# define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1)
416static jit_word_t
417_sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
418# define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1)
419static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
420# define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1)
421static jit_word_t
422_sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
423# define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1)
424static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
425# define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1)
426static jit_word_t
427_sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
428# define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1)
429static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
430# define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1)
431static jit_word_t
432_sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
433# define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1)
434static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
435# define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1)
436static jit_word_t
437_sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
438# define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1)
439static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
440# define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1)
441static jit_word_t
442_sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
443#endif
444
445#if CODE
446# define fpr_opi(name, type, size) \
447static void \
448_sse_##name##i_##type(jit_state_t *_jit, \
449 jit_int32_t r0, jit_int32_t r1, \
450 jit_float##size##_t *i0) \
451{ \
452 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
453 assert(jit_sse_reg_p(reg)); \
454 sse_movi_##type(rn(reg), i0); \
455 sse_##name##r_##type(r0, r1, rn(reg)); \
456 jit_unget_reg(reg); \
457}
458# define fpr_bopi(name, type, size) \
459static jit_word_t \
460_sse_b##name##i_##type(jit_state_t *_jit, \
461 jit_word_t i0, jit_int32_t r0, \
462 jit_float##size##_t *i1) \
463{ \
79bfeef6 464 jit_word_t w; \
4a71579b
PC
465 jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
466 jit_class_nospill); \
467 assert(jit_sse_reg_p(reg)); \
468 sse_movi_##type(rn(reg), i1); \
79bfeef6 469 w = sse_b##name##r_##type(i0, r0, rn(reg)); \
4a71579b 470 jit_unget_reg(reg); \
79bfeef6 471 return (w); \
4a71579b
PC
472}
473# define fopi(name) fpr_opi(name, f, 32)
474# define fbopi(name) fpr_bopi(name, f, 32)
475# define dopi(name) fpr_opi(name, d, 64)
476# define dbopi(name) fpr_bopi(name, d, 64)
477static void
478_sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
479{
480 rex(0, 0, r0, 0, r1);
481 ic(0x0f);
482 ic(c);
483 mrm(0x03, r7(r0), r7(r1));
484}
485
486static void
487_ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
488 jit_int32_t r0, jit_int32_t r1)
489{
490 ic(p);
491 rex(0, 0, r0, 0, r1);
492 ic(0x0f);
493 ic(c);
494 mrm(0x03, r7(r0), r7(r1));
495}
496
497static void
498_ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
499 jit_int32_t m, jit_int32_t i)
500{
501 ic(0x66);
502 rex(0, 0, 0, 0, r0);
503 ic(0x0f);
504 ic(c);
505 mrm(0x03, r7(m), r7(r0));
506 ic(i);
507}
508
509#if __X64
510static void
511_sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
512 jit_int32_t r0, jit_int32_t r1)
513{
514 ic(p);
515 rex(0, 1, r0, 0, r1);
516 ic(0x0f);
517 ic(c);
518 mrm(0x03, r7(r0), r7(r1));
519}
520#endif
521
522static void
523_ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
524 jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
525{
526 ic(px);
527 rex(0, 0, rd, ri, rb);
528 ic(0x0f);
529 ic(code);
530 rx(rd, md, rb, ri, ms);
531}
532
533static void
534_sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
535{
536 if (r0 == r1)
537 addssr(r0, r2);
538 else if (r0 == r2)
539 addssr(r0, r1);
540 else {
541 sse_movr_f(r0, r1);
542 addssr(r0, r2);
543 }
544}
545
546fopi(add)
547
548static void
549_sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
550{
551 if (r0 == r1)
552 addsdr(r0, r2);
553 else if (r0 == r2)
554 addsdr(r0, r1);
555 else {
556 sse_movr_d(r0, r1);
557 addsdr(r0, r2);
558 }
559}
560
561dopi(add)
562
563static void
564_sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
565{
566 jit_int32_t reg;
567 if (r0 == r1)
568 subssr(r0, r2);
569 else if (r0 == r2) {
570 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
571 sse_movr_f(rn(reg), r0);
572 sse_movr_f(r0, r1);
573 subssr(r0, rn(reg));
574 jit_unget_reg(reg);
575 }
576 else {
577 sse_movr_f(r0, r1);
578 subssr(r0, r2);
579 }
580}
581
582fopi(sub)
583
584static void
585_sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
586{
587 jit_int32_t reg;
588 if (r0 == r1)
589 subsdr(r0, r2);
590 else if (r0 == r2) {
591 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
592 sse_movr_d(rn(reg), r0);
593 sse_movr_d(r0, r1);
594 subsdr(r0, rn(reg));
595 jit_unget_reg(reg);
596 }
597 else {
598 sse_movr_d(r0, r1);
599 subsdr(r0, r2);
600 }
601}
602
603dopi(sub)
604
605fopi(rsb)
606
607dopi(rsb)
608
609static void
610_sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
611{
612 if (r0 == r1)
613 mulssr(r0, r2);
614 else if (r0 == r2)
615 mulssr(r0, r1);
616 else {
617 sse_movr_f(r0, r1);
618 mulssr(r0, r2);
619 }
620}
621
622fopi(mul)
623
624static void
625_sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
626{
627 if (r0 == r1)
628 mulsdr(r0, r2);
629 else if (r0 == r2)
630 mulsdr(r0, r1);
631 else {
632 sse_movr_d(r0, r1);
633 mulsdr(r0, r2);
634 }
635}
636
637dopi(mul)
638
639static void
640_sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
641{
642 jit_int32_t reg;
643 if (r0 == r1)
644 divssr(r0, r2);
645 else if (r0 == r2) {
646 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
647 sse_movr_f(rn(reg), r0);
648 sse_movr_f(r0, r1);
649 divssr(r0, rn(reg));
650 jit_unget_reg(reg);
651 }
652 else {
653 sse_movr_f(r0, r1);
654 divssr(r0, r2);
655 }
656}
657
658fopi(div)
659
660static void
661_sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
662{
663 jit_int32_t reg;
664 if (r0 == r1)
665 divsdr(r0, r2);
666 else if (r0 == r2) {
667 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
668 sse_movr_d(rn(reg), r0);
669 sse_movr_d(r0, r1);
670 divsdr(r0, rn(reg));
671 jit_unget_reg(reg);
672 }
673 else {
674 sse_movr_d(r0, r1);
675 divsdr(r0, r2);
676 }
677}
678
679dopi(div)
680
681static void
682_sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
683{
684 jit_int32_t reg;
685 if (r0 == r1) {
686 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
687 pcmpeqlr(rn(reg), rn(reg));
688 psrl(rn(reg), 1);
689 andpsr(r0, rn(reg));
690 jit_unget_reg(reg);
691 }
692 else {
693 pcmpeqlr(r0, r0);
694 psrl(r0, 1);
695 andpsr(r0, r1);
696 }
697}
698
699static void
700_sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
701{
702 jit_int32_t reg;
703 if (r0 == r1) {
704 reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
705 pcmpeqlr(rn(reg), rn(reg));
706 psrq(rn(reg), 1);
707 andpdr(r0, rn(reg));
708 jit_unget_reg(reg);
709 }
710 else {
711 pcmpeqlr(r0, r0);
712 psrq(r0, 1);
713 andpdr(r0, r1);
714 }
715}
716
717static void
718_sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
719{
720 jit_int32_t freg, ireg;
721 ireg = jit_get_reg(jit_class_gpr);
722 imovi(rn(ireg), 0x80000000);
723 if (r0 == r1) {
724 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
725 movdlxr(rn(freg), rn(ireg));
726 xorpsr(r0, rn(freg));
727 jit_unget_reg(freg);
728 }
729 else {
730 movdlxr(r0, rn(ireg));
731 xorpsr(r0, r1);
732 }
733 jit_unget_reg(ireg);
734}
735
736static void
737_sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
738{
739 jit_int32_t freg, ireg;
740 ireg = jit_get_reg(jit_class_gpr);
741 imovi(rn(ireg), 0x80000000);
742 if (r0 == r1) {
743 freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
744 movdlxr(rn(freg), rn(ireg));
745 pslq(rn(freg), 32);
746 xorpdr(r0, rn(freg));
747 jit_unget_reg(freg);
748 }
749 else {
750 movdlxr(r0, rn(ireg));
751 pslq(r0, 32);
752 xorpdr(r0, r1);
753 }
754 jit_unget_reg(ireg);
755}
756
757static void
758_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
759 jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
760{
761 jit_bool_t rc;
762 jit_int32_t reg;
763 if ((rc = reg8_p(r0)))
764 reg = r0;
765 else {
766 reg = _RAX_REGNO;
767 movr(r0, reg);
768 }
769 ixorr(reg, reg);
770 if (d)
771 ucomisdr(r2, r1);
772 else
773 ucomissr(r2, r1);
774 cc(code, reg);
775 if (!rc)
776 xchgr(r0, reg);
777}
778
779static void
780_sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
781{
782 if (r0 != r1)
783 ssexr(0xf3, X86_SSE_MOV, r0, r1);
784}
785
786static void
787_sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
788{
789 union {
790 jit_int32_t i;
791 jit_float32_t f;
792 } data;
793 jit_int32_t reg;
794 jit_bool_t ldi;
795
796 data.f = *i0;
797 if (data.f == 0.0 && !(data.i & 0x80000000))
798 xorpsr(r0, r0);
799 else {
800 ldi = !_jitc->no_data;
801#if __X64
802 /* if will allocate a register for offset, just use immediate */
79bfeef6
PC
803# if CAN_RIP_ADDRESS
804 if (ldi) {
805 jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
806 ldi = can_sign_extend_int_p(rel);
807 if (!ldi && address_p(i0))
808 ldi = 1;
809 }
810# else
811 if (ldi && !address_p(i0))
4a71579b 812 ldi = 0;
79bfeef6 813# endif
4a71579b
PC
814#endif
815 if (ldi)
816 sse_ldi_f(r0, (jit_word_t)i0);
817 else {
818 reg = jit_get_reg(jit_class_gpr);
819 movi(rn(reg), data.i);
820 movdlxr(r0, rn(reg));
821 jit_unget_reg(reg);
822 }
823 }
824}
825
826fopi(lt)
827fopi(le)
828
829static void
830_sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
831{
832 jit_bool_t rc;
833 jit_int32_t reg;
834 jit_word_t jp_code;
835 if ((rc = reg8_p(r0)))
836 reg = r0;
837 else {
838 reg = _RAX_REGNO;
839 movr(r0, _RAX_REGNO);
840 }
841 ixorr(reg, reg);
842 ucomissr(r2, r1);
79bfeef6 843 jp_code = jpes(0);
4a71579b 844 cc(X86_CC_E, reg);
79bfeef6 845 patch_at(jp_code, _jit->pc.w);
4a71579b
PC
846 if (!rc)
847 xchgr(r0, reg);
848}
849
850fopi(eq)
851fopi(ge)
852fopi(gt)
853
854static void
855_sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
856{
857 jit_bool_t rc;
858 jit_int32_t reg;
859 jit_word_t jp_code;
860 if ((rc = reg8_p(r0)))
861 reg = r0;
862 else {
863 reg = _RAX_REGNO;
864 movr(r0, _RAX_REGNO);
865 }
866 imovi(reg, 1);
867 ucomissr(r2, r1);
79bfeef6 868 jp_code = jpes(0);
4a71579b 869 cc(X86_CC_NE, reg);
79bfeef6 870 patch_at(jp_code, _jit->pc.w);
4a71579b
PC
871 if (!rc)
872 xchgr(r0, reg);
873}
874
875fopi(ne)
876fopi(unlt)
877
878static void
879_sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
880{
881 if (r1 == r2)
882 movi(r0, 1);
883 else
884 ssecmpf(X86_CC_NA, r0, r2, r1);
885}
886
887fopi(unle)
888
889static void
890_sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
891{
892 if (r1 == r2)
893 movi(r0, 1);
894 else
895 ssecmpf(X86_CC_E, r0, r1, r2);
896}
897
898fopi(uneq)
899
900static void
901_sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
902{
903 if (r1 == r2)
904 movi(r0, 1);
905 else
906 ssecmpf(X86_CC_NA, r0, r1, r2);
907}
908
909fopi(unge)
910fopi(ungt)
911
912static void
913_sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
914{
915 if (r1 == r2)
916 ixorr(r0, r0);
917 else
918 ssecmpf(X86_CC_NE, r0, r1, r2);
919}
920
921fopi(ltgt)
922fopi(ord)
923fopi(unord)
924
925static void
926_sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
927{
928 jit_int32_t reg;
79bfeef6
PC
929#if CAN_RIP_ADDRESS
930 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
931 if (can_sign_extend_int_p(rel))
932 movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
933 else
934#endif
935 if (address_p(i0))
4a71579b
PC
936 movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
937 else {
938 reg = jit_get_reg(jit_class_gpr);
939 movi(rn(reg), i0);
940 sse_ldr_f(r0, rn(reg));
941 jit_unget_reg(reg);
942 }
943}
944
945static void
946_sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
947{
948#if __X64_32
949 jit_int32_t reg;
950 reg = jit_get_reg(jit_class_gpr);
951 addr(rn(reg), r1, r2);
952 sse_ldr_f(r0, rn(reg));
953 jit_unget_reg(reg);
954#else
955 movssmr(0, r1, r2, _SCL1, r0);
956#endif
957}
958
959static void
960_sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
961{
962 jit_int32_t reg;
963 if (can_sign_extend_int_p(i0))
964 movssmr(i0, r1, _NOREG, _SCL1, r0);
965 else {
966 reg = jit_get_reg(jit_class_gpr);
967#if __X64_32
968 addi(rn(reg), r1, i0);
969 sse_ldr_f(r0, rn(reg));
970#else
971 movi(rn(reg), i0);
972 sse_ldxr_f(r0, r1, rn(reg));
973#endif
974 jit_unget_reg(reg);
975 }
976}
977
978static void
979_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
980{
981 jit_int32_t reg;
79bfeef6
PC
982#if CAN_RIP_ADDRESS
983 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
984 if (can_sign_extend_int_p(rel))
985 movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
986 else
987#endif
988 if (address_p(i0))
4a71579b
PC
989 movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
990 else {
991 reg = jit_get_reg(jit_class_gpr);
992 movi(rn(reg), i0);
993 sse_str_f(rn(reg), r0);
994 jit_unget_reg(reg);
995 }
996}
997
998static void
999_sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1000{
1001#if __X64_32
1002 jit_int32_t reg;
1003 reg = jit_get_reg(jit_class_gpr);
1004 addr(rn(reg), r0, r1);
1005 sse_str_f(rn(reg), r2);
1006 jit_unget_reg(reg);
1007#else
1008 movssrm(r2, 0, r0, r1, _SCL1);
1009#endif
1010}
1011
1012static void
1013_sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1014{
1015 jit_int32_t reg;
1016 if (can_sign_extend_int_p(i0))
1017 movssrm(r1, i0, r0, _NOREG, _SCL1);
1018 else {
1019 reg = jit_get_reg(jit_class_gpr);
1020#if __X64_32
1021 addi(rn(reg), r0, i0);
1022 sse_str_f(rn(reg), r1);
1023#else
1024 movi(rn(reg), i0);
1025 sse_stxr_f(rn(reg), r0, r1);
1026#endif
1027 jit_unget_reg(reg);
1028 }
1029}
1030
1031static jit_word_t
1032_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1033{
1034 ucomissr(r1, r0);
79bfeef6 1035 return (ja(i0));
4a71579b
PC
1036}
1037fbopi(lt)
1038
1039static jit_word_t
1040_sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1041{
1042 ucomissr(r1, r0);
79bfeef6 1043 return (jae(i0));
4a71579b
PC
1044}
1045fbopi(le)
1046
1047static jit_word_t
1048_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1049{
79bfeef6 1050 jit_word_t w;
4a71579b
PC
1051 jit_word_t jp_code;
1052 ucomissr(r0, r1);
79bfeef6
PC
1053 jp_code = jps(0);
1054 w = je(i0);
1055 patch_at(jp_code, _jit->pc.w);
1056 return (w);
4a71579b
PC
1057}
1058fbopi(eq)
1059
1060static jit_word_t
1061_sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1062{
1063 ucomissr(r0, r1);
79bfeef6 1064 return (jae(i0));
4a71579b
PC
1065}
1066fbopi(ge)
1067
1068static jit_word_t
1069_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1070{
1071 ucomissr(r0, r1);
79bfeef6 1072 return (ja(i0));
4a71579b
PC
1073}
1074fbopi(gt)
1075
1076static jit_word_t
1077_sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1078{
79bfeef6 1079 jit_word_t w;
4a71579b
PC
1080 jit_word_t jp_code;
1081 jit_word_t jz_code;
1082 ucomissr(r0, r1);
79bfeef6
PC
1083 jp_code = jps(0);
1084 jz_code = jzs(0);
1085 patch_at(jp_code, _jit->pc.w);
1086 w = jmpi(i0);
1087 patch_at(jz_code, _jit->pc.w);
1088 return (w);
4a71579b
PC
1089}
1090fbopi(ne)
1091
1092static jit_word_t
1093_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1094{
1095 ucomissr(r0, r1);
79bfeef6 1096 return (jnae(i0));
4a71579b
PC
1097}
1098fbopi(unlt)
1099
1100static jit_word_t
1101_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1102{
79bfeef6 1103 jit_word_t w;
4a71579b 1104 if (r0 == r1)
79bfeef6 1105 w = jmpi(i0);
4a71579b
PC
1106 else {
1107 ucomissr(r0, r1);
79bfeef6 1108 w = jna(i0);
4a71579b 1109 }
79bfeef6 1110 return (w);
4a71579b
PC
1111}
1112fbopi(unle)
1113
1114static jit_word_t
1115_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1116{
79bfeef6 1117 jit_word_t w;
4a71579b 1118 if (r0 == r1)
79bfeef6 1119 w = jmpi(i0);
4a71579b
PC
1120 else {
1121 ucomissr(r0, r1);
79bfeef6 1122 w = je(i0);
4a71579b 1123 }
79bfeef6 1124 return (w);
4a71579b
PC
1125}
1126fbopi(uneq)
1127
1128static jit_word_t
1129_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1130{
79bfeef6 1131 jit_word_t w;
4a71579b 1132 if (r0 == r1)
79bfeef6 1133 w = jmpi(i0);
4a71579b
PC
1134 else {
1135 ucomissr(r1, r0);
79bfeef6 1136 w = jna(i0);
4a71579b 1137 }
79bfeef6 1138 return (w);
4a71579b
PC
1139}
1140fbopi(unge)
1141
1142static jit_word_t
1143_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1144{
1145 ucomissr(r1, r0);
79bfeef6 1146 return (jnae(i0));
4a71579b
PC
1147}
1148fbopi(ungt)
1149
1150static jit_word_t
1151_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1152{
1153 ucomissr(r0, r1);
79bfeef6 1154 return (jne(i0));
4a71579b
PC
1155}
1156fbopi(ltgt)
1157
1158static jit_word_t
1159_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1160{
1161 ucomissr(r0, r1);
79bfeef6 1162 return (jnp(i0));
4a71579b
PC
1163}
1164fbopi(ord)
1165
1166static jit_word_t
1167_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1168{
1169 ucomissr(r0, r1);
79bfeef6 1170 return (jp(i0));
4a71579b
PC
1171}
1172fbopi(unord)
1173
1174dopi(lt)
1175dopi(le)
1176
1177static void
1178_sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1179{
1180 jit_bool_t rc;
1181 jit_int32_t reg;
1182 jit_word_t jp_code;
1183 if ((rc = reg8_p(r0)))
1184 reg = r0;
1185 else {
1186 reg = _RAX_REGNO;
1187 movr(r0, _RAX_REGNO);
1188 }
1189 ixorr(reg, reg);
1190 ucomisdr(r2, r1);
79bfeef6 1191 jp_code = jpes(0);
4a71579b 1192 cc(X86_CC_E, reg);
79bfeef6 1193 patch_at(jp_code, _jit->pc.w);
4a71579b
PC
1194 if (!rc)
1195 xchgr(r0, reg);
1196}
1197
1198dopi(eq)
1199dopi(ge)
1200dopi(gt)
1201
1202static void
1203_sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1204{
1205 jit_bool_t rc;
1206 jit_int32_t reg;
1207 jit_word_t jp_code;
1208 if ((rc = reg8_p(r0)))
1209 reg = r0;
1210 else {
1211 reg = _RAX_REGNO;
1212 movr(r0, _RAX_REGNO);
1213 }
1214 imovi(reg, 1);
1215 ucomisdr(r2, r1);
79bfeef6 1216 jp_code = jpes(0);
4a71579b 1217 cc(X86_CC_NE, reg);
79bfeef6 1218 patch_at(jp_code, _jit->pc.w);
4a71579b
PC
1219 if (!rc)
1220 xchgr(r0, reg);
1221}
1222
1223dopi(ne)
1224dopi(unlt)
1225
1226static void
1227_sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1228{
1229 if (r1 == r2)
1230 movi(r0, 1);
1231 else
1232 ssecmpd(X86_CC_NA, r0, r2, r1);
1233}
1234
1235dopi(unle)
1236
1237static void
1238_sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1239{
1240 if (r1 == r2)
1241 movi(r0, 1);
1242 else
1243 ssecmpd(X86_CC_E, r0, r1, r2);
1244}
1245
1246dopi(uneq)
1247
1248static void
1249_sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1250{
1251 if (r1 == r2)
1252 movi(r0, 1);
1253 else
1254 ssecmpd(X86_CC_NA, r0, r1, r2);
1255}
1256
1257dopi(unge)
1258dopi(ungt)
1259
1260static void
1261_sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1262{
1263 if (r1 == r2)
1264 ixorr(r0, r0);
1265 else
1266 ssecmpd(X86_CC_NE, r0, r1, r2);
1267}
1268
1269dopi(ltgt)
1270dopi(ord)
1271dopi(unord)
1272
1273static void
1274_sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1275{
1276 if (r0 != r1)
1277 ssexr(0xf2, X86_SSE_MOV, r0, r1);
1278}
1279
1280static void
1281_sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1282{
1283 union {
1284 jit_int32_t ii[2];
1285 jit_word_t w;
1286 jit_float64_t d;
1287 } data;
1288 jit_int32_t reg;
1289 jit_bool_t ldi;
1290
1291 data.d = *i0;
1292 if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1293 xorpdr(r0, r0);
1294 else {
1295 ldi = !_jitc->no_data;
1296#if __X64
1297 /* if will allocate a register for offset, just use immediate */
79bfeef6
PC
1298# if CAN_RIP_ADDRESS
1299 if (ldi) {
1300 jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1301 ldi = can_sign_extend_int_p(rel);
1302 if (!ldi && address_p(i0))
1303 ldi = 1;
1304 }
1305# else
1306 if (ldi && !address_p(i0))
4a71579b 1307 ldi = 0;
79bfeef6 1308# endif
4a71579b
PC
1309#endif
1310 if (ldi)
1311 sse_ldi_d(r0, (jit_word_t)i0);
1312 else {
1313 reg = jit_get_reg(jit_class_gpr);
1314#if __X64 && !__X64_32
1315 movi(rn(reg), data.w);
1316 movdqxr(r0, rn(reg));
1317 jit_unget_reg(reg);
1318#else
79bfeef6 1319 CHECK_CVT_OFFSET();
4a71579b
PC
1320 movi(rn(reg), data.ii[0]);
1321 stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1322 movi(rn(reg), data.ii[1]);
1323 stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1324 jit_unget_reg(reg);
1325 sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1326#endif
1327 }
1328 }
1329}
1330
1331static void
1332_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1333{
1334 jit_int32_t reg;
79bfeef6
PC
1335#if CAN_RIP_ADDRESS
1336 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1337 if (can_sign_extend_int_p(rel))
1338 movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
1339 else
1340#endif
1341 if (address_p(i0))
4a71579b
PC
1342 movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
1343 else {
1344 reg = jit_get_reg(jit_class_gpr);
1345 movi(rn(reg), i0);
1346 sse_ldr_d(r0, rn(reg));
1347 jit_unget_reg(reg);
1348 }
1349}
1350
1351static void
1352_sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1353{
1354#if __X64_32
1355 jit_int32_t reg;
1356 reg = jit_get_reg(jit_class_gpr);
1357 addr(rn(reg), r1, r2);
1358 sse_ldr_d(r0, rn(reg));
1359 jit_unget_reg(reg);
1360#else
1361 movsdmr(0, r1, r2, _SCL1, r0);
1362#endif
1363}
1364
1365static void
1366_sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1367{
1368 jit_int32_t reg;
1369 if (can_sign_extend_int_p(i0))
1370 movsdmr(i0, r1, _NOREG, _SCL1, r0);
1371 else {
1372 reg = jit_get_reg(jit_class_gpr);
1373#if __X64_32
1374 addi(rn(reg), r1, i0);
1375 sse_ldr_d(r0, rn(reg));
1376#else
1377 movi(rn(reg), i0);
1378 sse_ldxr_d(r0, r1, rn(reg));
1379#endif
1380 jit_unget_reg(reg);
1381 }
1382}
1383
1384static void
1385_sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1386{
1387 jit_int32_t reg;
79bfeef6
PC
1388#if CAN_RIP_ADDRESS
1389 jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1390 if (can_sign_extend_int_p(rel))
1391 movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
1392 else
1393#endif
1394 if (address_p(i0))
4a71579b
PC
1395 movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
1396 else {
1397 reg = jit_get_reg(jit_class_gpr);
1398 movi(rn(reg), i0);
1399 sse_str_d(rn(reg), r0);
1400 jit_unget_reg(reg);
1401 }
1402}
1403
1404static void
1405_sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1406{
1407#if __X64_32
1408 jit_int32_t reg;
1409 reg = jit_get_reg(jit_class_gpr);
1410 addr(rn(reg), r0, r1);
1411 sse_str_d(rn(reg), r2);
1412 jit_unget_reg(reg);
1413#else
1414 movsdrm(r2, 0, r0, r1, _SCL1);
1415#endif
1416}
1417
1418static void
1419_sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1420{
1421 jit_int32_t reg;
1422 if (can_sign_extend_int_p(i0))
1423 movsdrm(r1, i0, r0, _NOREG, _SCL1);
1424 else {
1425 reg = jit_get_reg(jit_class_gpr);
1426#if __X64_32
1427 addi(rn(reg), r0, i0);
1428 sse_str_d(rn(reg), r1);
1429#else
1430 movi(rn(reg), i0);
1431 sse_stxr_f(rn(reg), r0, r1);
1432#endif
1433 jit_unget_reg(reg);
1434 }
1435}
1436
1437static jit_word_t
1438_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1439{
1440 ucomisdr(r1, r0);
79bfeef6 1441 return (ja(i0));
4a71579b
PC
1442}
1443dbopi(lt)
1444
1445static jit_word_t
1446_sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1447{
1448 ucomisdr(r1, r0);
79bfeef6 1449 return (jae(i0));
4a71579b
PC
1450}
1451dbopi(le)
1452
1453static jit_word_t
1454_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1455{
79bfeef6 1456 jit_word_t w;
4a71579b
PC
1457 jit_word_t jp_code;
1458 ucomisdr(r0, r1);
79bfeef6
PC
1459 jp_code = jps(0);
1460 w = je(i0);
1461 patch_at(jp_code, _jit->pc.w);
1462 return (w);
4a71579b
PC
1463}
1464dbopi(eq)
1465
1466static jit_word_t
1467_sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1468{
1469 ucomisdr(r0, r1);
79bfeef6 1470 return (jae(i0));
4a71579b
PC
1471}
1472dbopi(ge)
1473
1474static jit_word_t
1475_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1476{
1477 ucomisdr(r0, r1);
79bfeef6 1478 return (ja(i0));
4a71579b
PC
1479}
1480dbopi(gt)
1481
1482static jit_word_t
1483_sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1484{
79bfeef6 1485 jit_word_t w;
4a71579b
PC
1486 jit_word_t jp_code;
1487 jit_word_t jz_code;
1488 ucomisdr(r0, r1);
79bfeef6
PC
1489 jp_code = jps(0);
1490 jz_code = jzs(0);
1491 patch_at(jp_code, _jit->pc.w);
1492 w = jmpi(i0);
1493 patch_at(jz_code, _jit->pc.w);
1494 return (w);
4a71579b
PC
1495}
1496dbopi(ne)
1497
1498static jit_word_t
1499_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1500{
1501 ucomisdr(r0, r1);
79bfeef6 1502 return (jnae(i0));
4a71579b
PC
1503}
1504dbopi(unlt)
1505
1506static jit_word_t
1507_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1508{
79bfeef6 1509 jit_word_t w;
4a71579b 1510 if (r0 == r1)
79bfeef6 1511 w = jmpi(i0);
4a71579b
PC
1512 else {
1513 ucomisdr(r0, r1);
79bfeef6 1514 w = jna(i0);
4a71579b 1515 }
79bfeef6 1516 return (w);
4a71579b
PC
1517}
1518dbopi(unle)
1519
1520static jit_word_t
1521_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1522{
79bfeef6 1523 jit_word_t w;
4a71579b 1524 if (r0 == r1)
79bfeef6 1525 w = jmpi(i0);
4a71579b
PC
1526 else {
1527 ucomisdr(r0, r1);
79bfeef6 1528 w = je(i0);
4a71579b 1529 }
79bfeef6 1530 return (w);
4a71579b
PC
1531}
1532dbopi(uneq)
1533
1534static jit_word_t
1535_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1536{
79bfeef6 1537 jit_word_t w;
4a71579b 1538 if (r0 == r1)
79bfeef6 1539 w = jmpi(i0);
4a71579b
PC
1540 else {
1541 ucomisdr(r1, r0);
79bfeef6 1542 w = jna(i0);
4a71579b 1543 }
79bfeef6 1544 return (w);
4a71579b
PC
1545}
1546dbopi(unge)
1547
1548static jit_word_t
1549_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1550{
1551 ucomisdr(r1, r0);
79bfeef6 1552 return (jnae(i0));
4a71579b
PC
1553}
1554dbopi(ungt)
1555
1556static jit_word_t
1557_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1558{
1559 ucomisdr(r0, r1);
79bfeef6 1560 return (jne(i0));
4a71579b
PC
1561}
1562dbopi(ltgt)
1563
1564static jit_word_t
1565_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1566{
1567 ucomisdr(r0, r1);
79bfeef6 1568 return (jnp(i0));
4a71579b
PC
1569}
1570dbopi(ord)
1571
1572static jit_word_t
1573_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1574{
1575 ucomisdr(r0, r1);
79bfeef6 1576 return (jp(i0));
4a71579b
PC
1577}
1578dbopi(unord)
1579# undef fopi
1580# undef fbopi
1581# undef bopi
1582# undef dbopi
1583# undef fpr_bopi
1584# undef fpr_opi
1585#endif