spu: adjust irqs again
[pcsx_rearmed.git] / deps / lightning / lib / jit_x86-sse.c
1 /*
2  * Copyright (C) 2012-2023  Free Software Foundation, Inc.
3  *
4  * This file is part of GNU lightning.
5  *
6  * GNU lightning is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * GNU lightning is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14  * License for more details.
15  *
16  * Authors:
17  *      Paulo Cesar Pereira de Andrade
18  */
19
20 #if PROTO
21 #  define _XMM6_REGNO                   6
22 #  define _XMM7_REGNO                   7
23 #  define _XMM8_REGNO                   8
24 #  define _XMM9_REGNO                   9
25 #  define _XMM10_REGNO                  10
26 #  define _XMM11_REGNO                  11
27 #  define _XMM12_REGNO                  12
28 #  define _XMM13_REGNO                  13
29 #  define _XMM14_REGNO                  14
30 #  define _XMM15_REGNO                  15
31 #define X86_SSE_MOV                     0x10
32 #define X86_SSE_MOV1                    0x11
33 #define X86_SSE_MOVLP                   0x12
34 #define X86_SSE_MOVHP                   0x16
35 #define X86_SSE_MOVA                    0x28
36 #define X86_SSE_CVTIS                   0x2a
37 #define X86_SSE_CVTTSI                  0x2c
38 #define X86_SSE_CVTSI                   0x2d
39 #define X86_SSE_UCOMI                   0x2e
40 #define X86_SSE_COMI                    0x2f
41 #define X86_SSE_ROUND                   0x3a
42 #define X86_SSE_SQRT                    0x51
43 #define X86_SSE_RSQRT                   0x52
44 #define X86_SSE_RCP                     0x53
45 #define X86_SSE_AND                     0x54
46 #define X86_SSE_ANDN                    0x55
47 #define X86_SSE_OR                      0x56
48 #define X86_SSE_XOR                     0x57
49 #define X86_SSE_ADD                     0x58
50 #define X86_SSE_MUL                     0x59
51 #define X86_SSE_CVTSD                   0x5a
52 #define X86_SSE_CVTDT                   0x5b
53 #define X86_SSE_SUB                     0x5c
54 #define X86_SSE_MIN                     0x5d
55 #define X86_SSE_DIV                     0x5e
56 #define X86_SSE_MAX                     0x5f
57 #define X86_SSE_X2G                     0x6e
58 #define X86_SSE_EQB                     0x74
59 #define X86_SSE_EQW                     0x75
60 #define X86_SSE_EQD                     0x76
61 #define X86_SSE_G2X                     0x7e
62 #define X86_SSE_MOV2                    0xd6
63 #  define sser(c,r0,r1)                 _sser(_jit,c,r0,r1)
64 static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
65 #  define ssexr(p,c,r0,r1)              _ssexr(_jit,p,c,r0,r1)
66 static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
67 #  define ssexi(c,r0,m,i)               _ssexi(_jit,c,r0,m,i)
68 static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
69 #  define addssr(r0, r1)                ssexr(0xf3, X86_SSE_ADD, r0, r1)
70 #  define addsdr(r0, r1)                ssexr(0xf2, X86_SSE_ADD, r0, r1)
71 #  define subssr(r0, r1)                ssexr(0xf3, X86_SSE_SUB, r0, r1)
72 #  define subsdr(r0, r1)                ssexr(0xf2, X86_SSE_SUB, r0, r1)
73 #  define mulssr(r0, r1)                ssexr(0xf3, X86_SSE_MUL, r0, r1)
74 #  define mulsdr(r0, r1)                ssexr(0xf2, X86_SSE_MUL, r0, r1)
75 #  define divssr(r0, r1)                ssexr(0xf3, X86_SSE_DIV, r0, r1)
76 #  define divsdr(r0, r1)                ssexr(0xf2, X86_SSE_DIV, r0, r1)
77 #  define andpsr(r0, r1)                sser(       X86_SSE_AND, r0, r1)
78 #  define andpdr(r0, r1)                ssexr(0x66, X86_SSE_AND, r0, r1)
79 #  define sse_truncr_f_i(r0, r1)        ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
80 #  define sse_truncr_d_i(r0, r1)        ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
81 #  if __X64
82 #    define sse_truncr_f_l(r0, r1)      sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
83 #    define sse_truncr_d_l(r0, r1)      sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
84 #    define sse_extr_f(r0, r1)          sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
85 #    define sse_extr_d(r0, r1)          sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
86 #  else
87 #    define sse_extr_f(r0, r1)          ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
88 #    define sse_extr_d(r0, r1)          ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
89 #  endif
90 #  define sse_extr_f_d(r0, r1)          ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
91 #  define sse_extr_d_f(r0, r1)          ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
92 #  define ucomissr(r0,r1)               sser(X86_SSE_UCOMI,r0,r1)
93 #  define ucomisdr(r0,r1)               ssexr(0x66,X86_SSE_UCOMI,r0,r1)
94 #  define xorpsr(r0,r1)                 sser(X86_SSE_XOR,r0,r1)
95 #  define xorpdr(r0,r1)                 ssexr(0x66,X86_SSE_XOR,r0,r1)
96 #  define movdlxr(r0,r1)                ssexr(0x66, X86_SSE_X2G,r0,r1)
97 #  define pcmpeqlr(r0, r1)              ssexr(0x66, X86_SSE_EQD, r0, r1)
98 #  define psrl(r0, i0)                  ssexi(0x72, r0, 0x02, i0)
99 #  define psrq(r0, i0)                  ssexi(0x73, r0, 0x02, i0)
100 #  define psll(r0, i0)                  ssexi(0x72, r0, 0x06, i0)
101 #  define pslq(r0, i0)                  ssexi(0x73, r0, 0x06, i0)
102 #  define movdqxr(r0,r1)                sselxr(0x66,X86_SSE_X2G,r0,r1)
103 #  if __X64 && !__X64_32
104 #    define sselxr(p,c,r0,r1)           _sselxr(_jit,p,c,r0,r1)
105 static void
106 _sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
107 #  else
108 #    define sselxr(p,c,r0,r1)           ssexr(p,c,r0,r1)
109 #  endif
110 #  define ssexrx(p,c,md,rb,ri,ms,rd)    _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
111 #  define movssmr(md,rb,ri,ms,rd)       ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
112 #  define movsdmr(md,rb,ri,ms,rd)       ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
113 #  define movssrm(rs,md,mb,mi,ms)       ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
114 #  define movsdrm(rs,md,mb,mi,ms)       ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
115 static void
116 _ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
117         jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
118 #  define sse_addr_f(r0, r1, r2)        _sse_addr_f(_jit, r0, r1, r2)
119 static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
120 #  define sse_addi_f(r0, r1, i0)        _sse_addi_f(_jit, r0, r1, i0)
121 static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
122 #  define sse_addr_d(r0, r1, r2)        _sse_addr_d(_jit, r0, r1, r2)
123 static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
124 #  define sse_addi_d(r0, r1, i0)        _sse_addi_d(_jit, r0, r1, i0)
125 static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
126 #  define sse_subr_f(r0, r1, r2)        _sse_subr_f(_jit, r0, r1, r2)
127 static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
128 #  define sse_subi_f(r0, r1, i0)        _sse_subi_f(_jit, r0, r1, i0)
129 static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
130 #  define sse_subr_d(r0, r1, r2)        _sse_subr_d(_jit, r0, r1, r2)
131 static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
132 #  define sse_subi_d(r0, r1, i0)        _sse_subi_d(_jit, r0, r1, i0)
133 static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
134 #  define sse_rsbr_f(r0, r1, r2)        sse_subr_f(r0, r2, r1)
135 #  define sse_rsbi_f(r0, r1, i0)        _sse_rsbi_f(_jit, r0, r1, i0)
136 static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
137 #  define sse_rsbr_d(r0, r1, r2)        sse_subr_d(r0, r2, r1)
138 #  define sse_rsbi_d(r0, r1, i0)        _sse_rsbi_d(_jit, r0, r1, i0)
139 static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
140 #  define sse_mulr_f(r0, r1, r2)        _sse_mulr_f(_jit, r0, r1, r2)
141 static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
142 #  define sse_muli_f(r0, r1, i0)        _sse_muli_f(_jit, r0, r1, i0)
143 static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
144 #  define sse_mulr_d(r0, r1, r2)        _sse_mulr_d(_jit, r0, r1, r2)
145 static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
146 #  define sse_muli_d(r0, r1, i0)        _sse_muli_d(_jit, r0, r1, i0)
147 static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
148 #  define sse_divr_f(r0, r1, r2)        _sse_divr_f(_jit, r0, r1, r2)
149 static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
150 #  define sse_divi_f(r0, r1, i0)        _sse_divi_f(_jit, r0, r1, i0)
151 static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
152 #  define sse_divr_d(r0, r1, r2)        _sse_divr_d(_jit, r0, r1, r2)
153 static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
154 #  define sse_divi_d(r0, r1, i0)        _sse_divi_d(_jit, r0, r1, i0)
155 static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
156 #  define sse_absr_f(r0, r1)            _sse_absr_f(_jit, r0, r1)
157 static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
158 #  define sse_absr_d(r0, r1)            _sse_absr_d(_jit, r0, r1)
159 static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
160 #  define sse_negr_f(r0, r1)            _sse_negr_f(_jit, r0, r1)
161 static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
162 #  define sse_negr_d(r0, r1)            _sse_negr_d(_jit, r0, r1)
163 static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
164 #  define sse_sqrtr_f(r0, r1)           ssexr(0xf3, X86_SSE_SQRT, r0, r1)
165 #  define sse_sqrtr_d(r0, r1)           ssexr(0xf2, X86_SSE_SQRT, r0, r1)
166 #  define ssecmpf(code, r0, r1, r2)     _ssecmp(_jit, 0, code, r0, r1, r2)
167 #  define ssecmpd(code, r0, r1, r2)     _ssecmp(_jit, 1, code, r0, r1, r2)
168 static void
169 _ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
170         jit_int32_t, jit_int32_t, jit_int32_t);
171 #define sse_movr_f(r0,r1)               _sse_movr_f(_jit,r0,r1)
172 static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
173 #define sse_movi_f(r0,i0)               _sse_movi_f(_jit,r0,i0)
174 static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
175 #  define sse_lti_f(r0, r1, i0)         _sse_lti_f(_jit, r0, r1, i0)
176 static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
177 #  define sse_ltr_f(r0, r1, r2)         ssecmpf(X86_CC_A, r0, r1, r2)
178 #  define sse_lei_f(r0, r1, i0)         _sse_lei_f(_jit, r0, r1, i0)
179 static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
180 #  define sse_ler_f(r0, r1, r2)         ssecmpf(X86_CC_AE, r0, r1, r2)
181 #  define sse_eqi_f(r0, r1, i0)         _sse_eqi_f(_jit, r0, r1, i0)
182 static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
183 #  define sse_eqr_f(r0, r1, r2)         _sse_eqr_f(_jit, r0, r1, r2)
184 static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
185 #  define sse_gei_f(r0, r1, i0)         _sse_gei_f(_jit, r0, r1, i0)
186 static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
187 #  define sse_ger_f(r0, r1, r2)         ssecmpf(X86_CC_AE, r0, r2, r1)
188 #  define sse_gti_f(r0, r1, i0)         _sse_gti_f(_jit, r0, r1, i0)
189 static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
190 #  define sse_gtr_f(r0, r1, r2)         ssecmpf(X86_CC_A, r0, r2, r1)
191 #  define sse_nei_f(r0, r1, i0)         _sse_nei_f(_jit, r0, r1, i0)
192 static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
193 #  define sse_ner_f(r0, r1, r2)         _sse_ner_f(_jit, r0, r1, r2)
194 static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
195 #  define sse_unlti_f(r0, r1, i0)       _sse_unlti_f(_jit, r0, r1, i0)
196 static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
197 #  define sse_unltr_f(r0, r1, r2)       ssecmpf(X86_CC_NAE, r0, r2, r1)
198 #  define sse_unlei_f(r0, r1, i0)       _sse_unlei_f(_jit, r0, r1, i0)
199 static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
200 #  define sse_unler_f(r0, r1, r2)       _sse_unler_f(_jit, r0, r1, r2)
201 #  define sse_uneqi_f(r0, r1, i0)       _sse_uneqi_f(_jit, r0, r1, i0)
202 static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
203 static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
204 #  define sse_uneqr_f(r0, r1, r2)       _sse_uneqr_f(_jit, r0, r1, r2)
205 static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
206 #  define sse_ungei_f(r0, r1, i0)       _sse_ungei_f(_jit, r0, r1, i0)
207 static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
208 #  define sse_unger_f(r0, r1, r2)       _sse_unger_f(_jit, r0, r1, r2)
209 static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
210 #  define sse_ungti_f(r0, r1, i0)       _sse_ungti_f(_jit, r0, r1, i0)
211 static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
212 #  define sse_ungtr_f(r0, r1, r2)       ssecmpf(X86_CC_NAE, r0, r1, r2)
213 #  define sse_ltgti_f(r0, r1, i0)       _sse_ltgti_f(_jit, r0, r1, i0)
214 static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
215 #  define sse_ltgtr_f(r0, r1, r2)       _sse_ltgtr_f(_jit, r0, r1, r2)
216 static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
217 #  define sse_ordi_f(r0, r1, i0)        _sse_ordi_f(_jit, r0, r1, i0)
218 static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
219 #  define sse_ordr_f(r0, r1, r2)        ssecmpf(X86_CC_NP, r0, r2, r1)
220 #  define sse_unordi_f(r0, r1, i0)      _sse_unordi_f(_jit, r0, r1, i0)
221 static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
222 #  define sse_unordr_f(r0, r1, r2)      ssecmpf(X86_CC_P, r0, r2, r1)
223 #  define sse_ldr_f(r0, r1)             movssmr(0, r1, _NOREG, _SCL1, r0)
224 #  define sse_ldi_f(r0, i0)             _sse_ldi_f(_jit, r0, i0)
225 static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
226 #  define sse_ldxr_f(r0, r1, r2)        _sse_ldxr_f(_jit, r0, r1, r2)
227 static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
228 #  define sse_ldxi_f(r0, r1, i0)        _sse_ldxi_f(_jit, r0, r1, i0)
229 static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
230 #  define sse_str_f(r0, r1)             movssrm(r1, 0, r0, _NOREG, _SCL1)
231 #  define sse_sti_f(i0, r0)             _sse_sti_f(_jit, i0, r0)
232 static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
233 #  define sse_stxr_f(r0, r1, r2)        _sse_stxr_f(_jit, r0, r1, r2)
234 static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
235 #  define sse_stxi_f(i0, r0, r1)        _sse_stxi_f(_jit, i0, r0, r1)
236 static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
237 #  define sse_bltr_f(i0, r0, r1)        _sse_bltr_f(_jit, i0, r0, r1)
238 static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
239 #  define sse_blti_f(i0, r0, i1)        _sse_blti_f(_jit, i0, r0, i1)
240 static jit_word_t
241 _sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
242 #  define sse_bler_f(i0, r0, r1)        _sse_bler_f(_jit, i0, r0, r1)
243 static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
244 #  define sse_blei_f(i0, r0, i1)        _sse_blei_f(_jit, i0, r0, i1)
245 static jit_word_t
246 _sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
247 #  define sse_beqr_f(i0, r0, r1)        _sse_beqr_f(_jit, i0, r0, r1)
248 static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
249 #  define sse_beqi_f(i0, r0, i1)        _sse_beqi_f(_jit, i0, r0, i1)
250 static jit_word_t
251 _sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
252 #  define sse_bger_f(i0, r0, r1)        _sse_bger_f(_jit, i0, r0, r1)
253 static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
254 #  define sse_bgei_f(i0, r0, i1)        _sse_bgei_f(_jit, i0, r0, i1)
255 static jit_word_t
256 _sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
257 #  define sse_bgtr_f(i0, r0, r1)        _sse_bgtr_f(_jit, i0, r0, r1)
258 static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
259 #  define sse_bgti_f(i0, r0, i1)        _sse_bgti_f(_jit, i0, r0, i1)
260 static jit_word_t
261 _sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
262 #  define sse_bner_f(i0, r0, r1)        _sse_bner_f(_jit, i0, r0, r1)
263 static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
264 #  define sse_bnei_f(i0, r0, i1)        _sse_bnei_f(_jit, i0, r0, i1)
265 static jit_word_t
266 _sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
267 #  define sse_bunltr_f(i0, r0, r1)      _sse_bunltr_f(_jit, i0, r0, r1)
268 static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
269 #  define sse_bunlti_f(i0, r0, i1)      _sse_bunlti_f(_jit, i0, r0, i1)
270 static jit_word_t
271 _sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
272 #  define sse_bunler_f(i0, r0, r1)      _sse_bunler_f(_jit, i0, r0, r1)
273 static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
274 #  define sse_bunlei_f(i0, r0, i1)      _sse_bunlei_f(_jit, i0, r0, i1)
275 static jit_word_t
276 _sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
277 #  define sse_buneqr_f(i0, r0, r1)      _sse_buneqr_f(_jit, i0, r0, r1)
278 static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
279 #  define sse_buneqi_f(i0, r0, i1)      _sse_buneqi_f(_jit, i0, r0, i1)
280 static jit_word_t
281 _sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
282 #  define sse_bunger_f(i0, r0, r1)      _sse_bunger_f(_jit, i0, r0, r1)
283 static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
284 #  define sse_bungei_f(i0, r0, i1)      _sse_bungei_f(_jit, i0, r0, i1)
285 static jit_word_t
286 _sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
287 #  define sse_bungtr_f(i0, r0, r1)      _sse_bungtr_f(_jit, i0, r0, r1)
288 static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
289 #  define sse_bungti_f(i0, r0, i1)      _sse_bungti_f(_jit, i0, r0, i1)
290 static jit_word_t
291 _sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
292 #  define sse_bltgtr_f(i0, r0, r1)      _sse_bltgtr_f(_jit, i0, r0, r1)
293 static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
294 #  define sse_bltgti_f(i0, r0, i1)      _sse_bltgti_f(_jit, i0, r0, i1)
295 static jit_word_t
296 _sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
297 #  define sse_bordr_f(i0, r0, r1)       _sse_bordr_f(_jit, i0, r0, r1)
298 static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
299 #  define sse_bordi_f(i0, r0, i1)       _sse_bordi_f(_jit, i0, r0, i1)
300 static jit_word_t
301 _sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
302 #  define sse_bunordr_f(i0, r0, r1)     _sse_bunordr_f(_jit, i0, r0, r1)
303 static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
304 #  define sse_bunordi_f(i0, r0, i1)     _sse_bunordi_f(_jit, i0, r0, i1)
305 static jit_word_t
306 _sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
307 #define sse_movr_d(r0,r1)               _sse_movr_d(_jit,r0,r1)
308 static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
309 #define sse_movi_d(r0,i0)               _sse_movi_d(_jit,r0,i0)
310 static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
311 #  define sse_ltr_d(r0, r1, r2)         ssecmpd(X86_CC_A, r0, r1, r2)
312 #  define sse_lti_d(r0, r1, i0)         _sse_lti_d(_jit, r0, r1, i0)
313 static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
314 #  define sse_ler_d(r0, r1, r2)         ssecmpd(X86_CC_AE, r0, r1, r2)
315 #  define sse_lei_d(r0, r1, i0)         _sse_lei_d(_jit, r0, r1, i0)
316 static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
317 #  define sse_eqr_d(r0, r1, r2)         _sse_eqr_d(_jit, r0, r1, r2)
318 static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
319 #  define sse_eqi_d(r0, r1, i0)         _sse_eqi_d(_jit, r0, r1, i0)
320 static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
321 #  define sse_ger_d(r0, r1, r2)         ssecmpd(X86_CC_AE, r0, r2, r1)
322 #  define sse_gei_d(r0, r1, i0)         _sse_gei_d(_jit, r0, r1, i0)
323 static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
324 #  define sse_gtr_d(r0, r1, r2)         ssecmpd(X86_CC_A, r0, r2, r1)
325 #  define sse_gti_d(r0, r1, i0)         _sse_gti_d(_jit, r0, r1, i0)
326 static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
327 #  define sse_ner_d(r0, r1, r2)         _sse_ner_d(_jit, r0, r1, r2)
328 static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
329 #  define sse_nei_d(r0, r1, i0)         _sse_nei_d(_jit, r0, r1, i0)
330 static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
331 #  define sse_unltr_d(r0, r1, r2)       ssecmpd(X86_CC_NAE, r0, r2, r1)
332 #  define sse_unlti_d(r0, r1, i0)       _sse_unlti_d(_jit, r0, r1, i0)
333 static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
334 #  define sse_unler_d(r0, r1, r2)       _sse_unler_d(_jit, r0, r1, r2)
335 static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
336 #  define sse_unlei_d(r0, r1, i0)       _sse_unlei_d(_jit, r0, r1, i0)
337 static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
338 #  define sse_uneqr_d(r0, r1, r2)       _sse_uneqr_d(_jit, r0, r1, r2)
339 static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
340 #  define sse_uneqi_d(r0, r1, i0)       _sse_uneqi_d(_jit, r0, r1, i0)
341 static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
342 #  define sse_unger_d(r0, r1, r2)       _sse_unger_d(_jit, r0, r1, r2)
343 static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
344 #  define sse_ungei_d(r0, r1, i0)       _sse_ungei_d(_jit, r0, r1, i0)
345 static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
346 #  define sse_ungtr_d(r0, r1, r2)       ssecmpd(X86_CC_NAE, r0, r1, r2)
347 #  define sse_ungti_d(r0, r1, i0)       _sse_ungti_d(_jit, r0, r1, i0)
348 static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
349 #  define sse_ltgtr_d(r0, r1, r2)       _sse_ltgtr_d(_jit, r0, r1, r2)
350 static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
351 #  define sse_ltgti_d(r0, r1, i0)       _sse_ltgti_d(_jit, r0, r1, i0)
352 static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
353 #  define sse_ordr_d(r0, r1, r2)        ssecmpd(X86_CC_NP, r0, r2, r1)
354 #  define sse_ordi_d(r0, r1, i0)        _sse_ordi_d(_jit, r0, r1, i0)
355 static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
356 #  define sse_unordr_d(r0, r1, r2)      ssecmpd(X86_CC_P, r0, r2, r1)
357 #  define sse_unordi_d(r0, r1, i0)      _sse_unordi_d(_jit, r0, r1, i0)
358 static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
359 #  define sse_ldr_d(r0, r1)             movsdmr(0, r1, _NOREG, _SCL1, r0)
360 #  define sse_ldi_d(r0, i0)             _sse_ldi_d(_jit, r0, i0)
361 static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
362 #  define sse_ldxr_d(r0, r1, r2)        _sse_ldxr_d(_jit, r0, r1, r2)
363 static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
364 #  define sse_ldxi_d(r0, r1, i0)        _sse_ldxi_d(_jit, r0, r1, i0)
365 static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
366 #  define sse_bltr_d(i0, r0, r1)        _sse_bltr_d(_jit, i0, r0, r1)
367 #  define sse_str_d(r0, r1)             movsdrm(r1, 0, r0, _NOREG, _SCL1)
368 #  define sse_sti_d(i0, r0)             _sse_sti_d(_jit, i0, r0)
369 static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
370 #  define sse_stxr_d(r0, r1, r2)        _sse_stxr_d(_jit, r0, r1, r2)
371 static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
372 #  define sse_stxi_d(i0, r0, r1)        _sse_stxi_d(_jit, i0, r0, r1)
373 static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
374 static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
375 #  define sse_blti_d(i0, r0, i1)        _sse_blti_d(_jit, i0, r0, i1)
376 static jit_word_t
377 _sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
378 #  define sse_bler_d(i0, r0, r1)        _sse_bler_d(_jit, i0, r0, r1)
379 static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
380 #  define sse_blei_d(i0, r0, i1)        _sse_blei_d(_jit, i0, r0, i1)
381 static jit_word_t
382 _sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
383 #  define sse_beqr_d(i0, r0, r1)        _sse_beqr_d(_jit, i0, r0, r1)
384 static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
385 #  define sse_beqi_d(i0, r0, i1)        _sse_beqi_d(_jit, i0, r0, i1)
386 static jit_word_t
387 _sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
388 #  define sse_bger_d(i0, r0, r1)        _sse_bger_d(_jit, i0, r0, r1)
389 static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
390 #  define sse_bgei_d(i0, r0, i1)        _sse_bgei_d(_jit, i0, r0, i1)
391 static jit_word_t
392 _sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
393 #  define sse_bgtr_d(i0, r0, r1)        _sse_bgtr_d(_jit, i0, r0, r1)
394 static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
395 #  define sse_bgti_d(i0, r0, i1)        _sse_bgti_d(_jit, i0, r0, i1)
396 static jit_word_t
397 _sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
398 #  define sse_bner_d(i0, r0, r1)        _sse_bner_d(_jit, i0, r0, r1)
399 static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
400 #  define sse_bnei_d(i0, r0, i1)        _sse_bnei_d(_jit, i0, r0, i1)
401 static jit_word_t
402 _sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
403 #  define sse_bunltr_d(i0, r0, r1)      _sse_bunltr_d(_jit, i0, r0, r1)
404 static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
405 #  define sse_bunlti_d(i0, r0, i1)      _sse_bunlti_d(_jit, i0, r0, i1)
406 static jit_word_t
407 _sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
408 #  define sse_bunler_d(i0, r0, r1)      _sse_bunler_d(_jit, i0, r0, r1)
409 static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
410 #  define sse_bunlei_d(i0, r0, i1)      _sse_bunlei_d(_jit, i0, r0, i1)
411 static jit_word_t
412 _sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
413 #  define sse_buneqr_d(i0, r0, r1)      _sse_buneqr_d(_jit, i0, r0, r1)
414 static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
415 #  define sse_buneqi_d(i0, r0, i1)      _sse_buneqi_d(_jit, i0, r0, i1)
416 static jit_word_t
417 _sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
418 #  define sse_bunger_d(i0, r0, r1)      _sse_bunger_d(_jit, i0, r0, r1)
419 static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
420 #  define sse_bungei_d(i0, r0, i1)      _sse_bungei_d(_jit, i0, r0, i1)
421 static jit_word_t
422 _sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
423 #  define sse_bungtr_d(i0, r0, r1)      _sse_bungtr_d(_jit, i0, r0, r1)
424 static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
425 #  define sse_bungti_d(i0, r0, i1)      _sse_bungti_d(_jit, i0, r0, i1)
426 static jit_word_t
427 _sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
428 #  define sse_bltgtr_d(i0, r0, r1)      _sse_bltgtr_d(_jit, i0, r0, r1)
429 static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
430 #  define sse_bltgti_d(i0, r0, i1)      _sse_bltgti_d(_jit, i0, r0, i1)
431 static jit_word_t
432 _sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
433 #  define sse_bordr_d(i0, r0, r1)       _sse_bordr_d(_jit, i0, r0, r1)
434 static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
435 #  define sse_bordi_d(i0, r0, i1)       _sse_bordi_d(_jit, i0, r0, i1)
436 static jit_word_t
437 _sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
438 #  define sse_bunordr_d(i0, r0, r1)     _sse_bunordr_d(_jit, i0, r0, r1)
439 static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
440 #  define sse_bunordi_d(i0, r0, i1)     _sse_bunordi_d(_jit, i0, r0, i1)
441 static jit_word_t
442 _sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
443 #endif
444
445 #if CODE
446 #  define fpr_opi(name, type, size)                                     \
447 static void                                                             \
448 _sse_##name##i_##type(jit_state_t *_jit,                                \
449                       jit_int32_t r0, jit_int32_t r1,                   \
450                       jit_float##size##_t *i0)                          \
451 {                                                                       \
452     jit_int32_t         reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
453     assert(jit_sse_reg_p(reg));                                         \
454     sse_movi_##type(rn(reg), i0);                                       \
455     sse_##name##r_##type(r0, r1, rn(reg));                              \
456     jit_unget_reg(reg);                                                 \
457 }
458 #  define fpr_bopi(name, type, size)                                    \
459 static jit_word_t                                                       \
460 _sse_b##name##i_##type(jit_state_t *_jit,                               \
461                        jit_word_t i0, jit_int32_t r0,                   \
462                        jit_float##size##_t *i1)                         \
463 {                                                                       \
464     jit_word_t          w;                                              \
465     jit_int32_t         reg = jit_get_reg(jit_class_fpr|jit_class_xpr|  \
466                                           jit_class_nospill);           \
467     assert(jit_sse_reg_p(reg));                                         \
468     sse_movi_##type(rn(reg), i1);                                       \
469     w = sse_b##name##r_##type(i0, r0, rn(reg));                         \
470     jit_unget_reg(reg);                                                 \
471     return (w);                                                         \
472 }
473 #  define fopi(name)                    fpr_opi(name, f, 32)
474 #  define fbopi(name)                   fpr_bopi(name, f, 32)
475 #  define dopi(name)                    fpr_opi(name, d, 64)
476 #  define dbopi(name)                   fpr_bopi(name, d, 64)
477 static void
478 _sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
479 {
480     rex(0, 0, r0, 0, r1);
481     ic(0x0f);
482     ic(c);
483     mrm(0x03, r7(r0), r7(r1));
484 }
485
486 static void
487 _ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
488        jit_int32_t r0, jit_int32_t r1)
489 {
490     ic(p);
491     rex(0, 0, r0, 0, r1);
492     ic(0x0f);
493     ic(c);
494     mrm(0x03, r7(r0), r7(r1));
495 }
496
497 static void
498 _ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
499        jit_int32_t m, jit_int32_t i)
500 {
501     ic(0x66);
502     rex(0, 0, 0, 0, r0);
503     ic(0x0f);
504     ic(c);
505     mrm(0x03, r7(m), r7(r0));
506     ic(i);
507 }
508
509 #if __X64
510 static void
511 _sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
512         jit_int32_t r0, jit_int32_t r1)
513 {
514     ic(p);
515     rex(0, 1, r0, 0, r1);
516     ic(0x0f);
517     ic(c);
518     mrm(0x03, r7(r0), r7(r1));
519 }
520 #endif
521
522 static void
523 _ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
524        jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
525 {
526     ic(px);
527     rex(0, 0, rd, ri, rb);
528     ic(0x0f);
529     ic(code);
530     rx(rd, md, rb, ri, ms);
531 }
532
533 static void
534 _sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
535 {
536     if (r0 == r1)
537         addssr(r0, r2);
538     else if (r0 == r2)
539         addssr(r0, r1);
540     else {
541         sse_movr_f(r0, r1);
542         addssr(r0, r2);
543     }
544 }
545
546 fopi(add)
547
548 static void
549 _sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
550 {
551     if (r0 == r1)
552         addsdr(r0, r2);
553     else if (r0 == r2)
554         addsdr(r0, r1);
555     else {
556         sse_movr_d(r0, r1);
557         addsdr(r0, r2);
558     }
559 }
560
561 dopi(add)
562
563 static void
564 _sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
565 {
566     jit_int32_t         reg;
567     if (r0 == r1)
568         subssr(r0, r2);
569     else if (r0 == r2) {
570         reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
571         sse_movr_f(rn(reg), r0);
572         sse_movr_f(r0, r1);
573         subssr(r0, rn(reg));
574         jit_unget_reg(reg);
575     }
576     else {
577         sse_movr_f(r0, r1);
578         subssr(r0, r2);
579     }
580 }
581
582 fopi(sub)
583
584 static void
585 _sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
586 {
587     jit_int32_t         reg;
588     if (r0 == r1)
589         subsdr(r0, r2);
590     else if (r0 == r2) {
591         reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
592         sse_movr_d(rn(reg), r0);
593         sse_movr_d(r0, r1);
594         subsdr(r0, rn(reg));
595         jit_unget_reg(reg);
596     }
597     else {
598         sse_movr_d(r0, r1);
599         subsdr(r0, r2);
600     }
601 }
602
603 dopi(sub)
604
605 fopi(rsb)
606
607 dopi(rsb)
608
609 static void
610 _sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
611 {
612     if (r0 == r1)
613         mulssr(r0, r2);
614     else if (r0 == r2)
615         mulssr(r0, r1);
616     else {
617         sse_movr_f(r0, r1);
618         mulssr(r0, r2);
619     }
620 }
621
622 fopi(mul)
623
624 static void
625 _sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
626 {
627     if (r0 == r1)
628         mulsdr(r0, r2);
629     else if (r0 == r2)
630         mulsdr(r0, r1);
631     else {
632         sse_movr_d(r0, r1);
633         mulsdr(r0, r2);
634     }
635 }
636
637 dopi(mul)
638
639 static void
640 _sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
641 {
642     jit_int32_t         reg;
643     if (r0 == r1)
644         divssr(r0, r2);
645     else if (r0 == r2) {
646         reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
647         sse_movr_f(rn(reg), r0);
648         sse_movr_f(r0, r1);
649         divssr(r0, rn(reg));
650         jit_unget_reg(reg);
651     }
652     else {
653         sse_movr_f(r0, r1);
654         divssr(r0, r2);
655     }
656 }
657
658 fopi(div)
659
660 static void
661 _sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
662 {
663     jit_int32_t         reg;
664     if (r0 == r1)
665         divsdr(r0, r2);
666     else if (r0 == r2) {
667         reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
668         sse_movr_d(rn(reg), r0);
669         sse_movr_d(r0, r1);
670         divsdr(r0, rn(reg));
671         jit_unget_reg(reg);
672     }
673     else {
674         sse_movr_d(r0, r1);
675         divsdr(r0, r2);
676     }
677 }
678
679 dopi(div)
680
681 static void
682 _sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
683 {
684     jit_int32_t         reg;
685     if (r0 == r1) {
686         reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
687         pcmpeqlr(rn(reg), rn(reg));
688         psrl(rn(reg), 1);
689         andpsr(r0, rn(reg));
690         jit_unget_reg(reg);
691     }
692     else {
693         pcmpeqlr(r0, r0);
694         psrl(r0, 1);
695         andpsr(r0, r1);
696     }
697 }
698
699 static void
700 _sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
701 {
702     jit_int32_t         reg;
703     if (r0 == r1) {
704         reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
705         pcmpeqlr(rn(reg), rn(reg));
706         psrq(rn(reg), 1);
707         andpdr(r0, rn(reg));
708         jit_unget_reg(reg);
709     }
710     else {
711         pcmpeqlr(r0, r0);
712         psrq(r0, 1);
713         andpdr(r0, r1);
714     }
715 }
716
717 static void
718 _sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
719 {
720     jit_int32_t         freg, ireg;
721     ireg = jit_get_reg(jit_class_gpr);
722     imovi(rn(ireg), 0x80000000);
723     if (r0 == r1) {
724         freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
725         movdlxr(rn(freg), rn(ireg));
726         xorpsr(r0, rn(freg));
727         jit_unget_reg(freg);
728     }
729     else {
730         movdlxr(r0, rn(ireg));
731         xorpsr(r0, r1);
732     }
733     jit_unget_reg(ireg);
734 }
735
736 static void
737 _sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
738 {
739     jit_int32_t         freg, ireg;
740     ireg = jit_get_reg(jit_class_gpr);
741     imovi(rn(ireg), 0x80000000);
742     if (r0 == r1) {
743         freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
744         movdlxr(rn(freg), rn(ireg));
745         pslq(rn(freg), 32);
746         xorpdr(r0, rn(freg));
747         jit_unget_reg(freg);
748     }
749     else {
750         movdlxr(r0, rn(ireg));
751         pslq(r0, 32);
752         xorpdr(r0, r1);
753     }
754     jit_unget_reg(ireg);
755 }
756
757 static void
758 _ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
759         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
760 {
761     jit_bool_t          rc;
762     jit_int32_t         reg;
763     if ((rc = reg8_p(r0)))
764         reg = r0;
765     else {
766         reg = _RAX_REGNO;
767         movr(r0, reg);
768     }
769     ixorr(reg, reg);
770     if (d)
771         ucomisdr(r2, r1);
772     else
773         ucomissr(r2, r1);
774     cc(code, reg);
775     if (!rc)
776         xchgr(r0, reg);
777 }
778
779 static void
780 _sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
781 {
782     if (r0 != r1)
783         ssexr(0xf3, X86_SSE_MOV, r0, r1);
784 }
785
786 static void
787 _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
788 {
789     union {
790         jit_int32_t      i;
791         jit_float32_t    f;
792     } data;
793     jit_int32_t          reg;
794     jit_bool_t           ldi;
795
796     data.f = *i0;
797     if (data.f == 0.0 && !(data.i & 0x80000000))
798         xorpsr(r0, r0);
799     else {
800         ldi = !_jitc->no_data;
801 #if __X64
802         /* if will allocate a register for offset, just use immediate */
803 #  if CAN_RIP_ADDRESS
804         if (ldi) {
805             jit_word_t  rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
806             ldi = can_sign_extend_int_p(rel);
807             if (!ldi && address_p(i0))
808                 ldi = 1;
809         }
810 #  else
811         if (ldi && !address_p(i0))
812             ldi = 0;
813 #  endif
814 #endif
815         if (ldi)
816             sse_ldi_f(r0, (jit_word_t)i0);
817         else {
818             reg = jit_get_reg(jit_class_gpr);
819             movi(rn(reg), data.i);
820             movdlxr(r0, rn(reg));
821             jit_unget_reg(reg);
822         }
823     }
824 }
825
826 fopi(lt)
827 fopi(le)
828
829 static void
830 _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
831 {
832     jit_bool_t          rc;
833     jit_int32_t         reg;
834     jit_word_t          jp_code;
835     if ((rc = reg8_p(r0)))
836         reg = r0;
837     else {
838         reg = _RAX_REGNO;
839         movr(r0, _RAX_REGNO);
840     }
841     ixorr(reg, reg);
842     ucomissr(r2, r1);
843     jp_code = jpes(0);
844     cc(X86_CC_E, reg);
845     patch_at(jp_code, _jit->pc.w);
846     if (!rc)
847         xchgr(r0, reg);
848 }
849
850 fopi(eq)
851 fopi(ge)
852 fopi(gt)
853
854 static void
855 _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
856 {
857     jit_bool_t          rc;
858     jit_int32_t         reg;
859     jit_word_t          jp_code;
860     if ((rc = reg8_p(r0)))
861         reg = r0;
862     else {
863         reg = _RAX_REGNO;
864         movr(r0, _RAX_REGNO);
865     }
866     imovi(reg, 1);
867     ucomissr(r2, r1);
868     jp_code = jpes(0);
869     cc(X86_CC_NE, reg);
870     patch_at(jp_code, _jit->pc.w);
871     if (!rc)
872         xchgr(r0, reg);
873 }
874
875 fopi(ne)
876 fopi(unlt)
877
878 static void
879 _sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
880 {
881     if (r1 == r2)
882         movi(r0, 1);
883     else
884         ssecmpf(X86_CC_NA, r0, r2, r1);
885 }
886
887 fopi(unle)
888
889 static void
890 _sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
891 {
892     if (r1 == r2)
893         movi(r0, 1);
894     else
895         ssecmpf(X86_CC_E, r0, r1, r2);
896 }
897
898 fopi(uneq)
899
900 static void
901 _sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
902 {
903     if (r1 == r2)
904         movi(r0, 1);
905     else
906         ssecmpf(X86_CC_NA, r0, r1, r2);
907 }
908
909 fopi(unge)
910 fopi(ungt)
911
912 static void
913 _sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
914 {
915     if (r1 == r2)
916         ixorr(r0, r0);
917     else
918         ssecmpf(X86_CC_NE, r0, r1, r2);
919 }
920
921 fopi(ltgt)
922 fopi(ord)
923 fopi(unord)
924
925 static void
926 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
927 {
928     jit_int32_t         reg;
929 #if CAN_RIP_ADDRESS
930     jit_word_t          rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
931     if (can_sign_extend_int_p(rel))
932         movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
933     else
934 #endif
935     if (address_p(i0))
936         movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
937     else {
938         reg = jit_get_reg(jit_class_gpr);
939         movi(rn(reg), i0);
940         sse_ldr_f(r0, rn(reg));
941         jit_unget_reg(reg);
942     }
943 }
944
945 static void
946 _sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
947 {
948 #if __X64_32
949     jit_int32_t         reg;
950     reg = jit_get_reg(jit_class_gpr);
951     addr(rn(reg), r1, r2);
952     sse_ldr_f(r0, rn(reg));
953     jit_unget_reg(reg);
954 #else
955     movssmr(0, r1, r2, _SCL1, r0);
956 #endif
957 }
958
959 static void
960 _sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
961 {
962     jit_int32_t         reg;
963     if (can_sign_extend_int_p(i0))
964         movssmr(i0, r1, _NOREG, _SCL1, r0);
965     else {
966         reg = jit_get_reg(jit_class_gpr);
967 #if __X64_32
968         addi(rn(reg), r1, i0);
969         sse_ldr_f(r0, rn(reg));
970 #else
971         movi(rn(reg), i0);
972         sse_ldxr_f(r0, r1, rn(reg));
973 #endif
974         jit_unget_reg(reg);
975     }
976 }
977
978 static void
979 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
980 {
981     jit_int32_t         reg;
982 #if CAN_RIP_ADDRESS
983     jit_word_t          rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
984     if (can_sign_extend_int_p(rel))
985         movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
986     else
987 #endif
988     if (address_p(i0))
989         movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
990     else {
991         reg = jit_get_reg(jit_class_gpr);
992         movi(rn(reg), i0);
993         sse_str_f(rn(reg), r0);
994         jit_unget_reg(reg);
995     }
996 }
997
998 static void
999 _sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1000 {
1001 #if __X64_32
1002     jit_int32_t         reg;
1003     reg = jit_get_reg(jit_class_gpr);
1004     addr(rn(reg), r0, r1);
1005     sse_str_f(rn(reg), r2);
1006     jit_unget_reg(reg);
1007 #else
1008     movssrm(r2, 0, r0, r1, _SCL1);
1009 #endif
1010 }
1011
1012 static void
1013 _sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1014 {
1015     jit_int32_t         reg;
1016     if (can_sign_extend_int_p(i0))
1017         movssrm(r1, i0, r0, _NOREG, _SCL1);
1018     else {
1019         reg = jit_get_reg(jit_class_gpr);
1020 #if __X64_32
1021         addi(rn(reg), r0, i0);
1022         sse_str_f(rn(reg), r1);
1023 #else
1024         movi(rn(reg), i0);
1025         sse_stxr_f(rn(reg), r0, r1);
1026 #endif
1027         jit_unget_reg(reg);
1028     }
1029 }
1030
1031 static jit_word_t
1032 _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1033 {
1034     ucomissr(r1, r0);
1035     return (ja(i0));
1036 }
1037 fbopi(lt)
1038
1039 static jit_word_t
1040 _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1041 {
1042     ucomissr(r1, r0);
1043     return (jae(i0));
1044 }
1045 fbopi(le)
1046
1047 static jit_word_t
1048 _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1049 {
1050     jit_word_t          w;
1051     jit_word_t          jp_code;
1052     ucomissr(r0, r1);
1053     jp_code = jps(0);
1054     w = je(i0);
1055     patch_at(jp_code, _jit->pc.w);
1056     return (w);
1057 }
1058 fbopi(eq)
1059
1060 static jit_word_t
1061 _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1062 {
1063     ucomissr(r0, r1);
1064     return (jae(i0));
1065 }
1066 fbopi(ge)
1067
1068 static jit_word_t
1069 _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1070 {
1071     ucomissr(r0, r1);
1072     return (ja(i0));
1073 }
1074 fbopi(gt)
1075
1076 static jit_word_t
1077 _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1078 {
1079     jit_word_t          w;
1080     jit_word_t          jp_code;
1081     jit_word_t          jz_code;
1082     ucomissr(r0, r1);
1083     jp_code = jps(0);
1084     jz_code = jzs(0);
1085     patch_at(jp_code, _jit->pc.w);
1086     w = jmpi(i0);
1087     patch_at(jz_code, _jit->pc.w);
1088     return (w);
1089 }
1090 fbopi(ne)
1091
1092 static jit_word_t
1093 _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1094 {
1095     ucomissr(r0, r1);
1096     return (jnae(i0));
1097 }
1098 fbopi(unlt)
1099
1100 static jit_word_t
1101 _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1102 {
1103     jit_word_t          w;
1104     if (r0 == r1)
1105         w = jmpi(i0);
1106     else {
1107         ucomissr(r0, r1);
1108         w = jna(i0);
1109     }
1110     return (w);
1111 }
1112 fbopi(unle)
1113
1114 static jit_word_t
1115 _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1116 {
1117     jit_word_t          w;
1118     if (r0 == r1)
1119         w = jmpi(i0);
1120     else {
1121         ucomissr(r0, r1);
1122         w = je(i0);
1123     }
1124     return (w);
1125 }
1126 fbopi(uneq)
1127
1128 static jit_word_t
1129 _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1130 {
1131     jit_word_t          w;
1132     if (r0 == r1)
1133         w = jmpi(i0);
1134     else {
1135         ucomissr(r1, r0);
1136         w = jna(i0);
1137     }
1138     return (w);
1139 }
1140 fbopi(unge)
1141
1142 static jit_word_t
1143 _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1144 {
1145     ucomissr(r1, r0);
1146     return (jnae(i0));
1147 }
1148 fbopi(ungt)
1149
1150 static jit_word_t
1151 _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1152 {
1153     ucomissr(r0, r1);
1154     return (jne(i0));
1155 }
1156 fbopi(ltgt)
1157
1158 static jit_word_t
1159 _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1160 {
1161     ucomissr(r0, r1);
1162     return (jnp(i0));
1163 }
1164 fbopi(ord)
1165
1166 static jit_word_t
1167 _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1168 {
1169     ucomissr(r0, r1);
1170     return (jp(i0));
1171 }
1172 fbopi(unord)
1173
1174 dopi(lt)
1175 dopi(le)
1176
1177 static void
1178 _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1179 {
1180     jit_bool_t          rc;
1181     jit_int32_t         reg;
1182     jit_word_t          jp_code;
1183     if ((rc = reg8_p(r0)))
1184         reg = r0;
1185     else {
1186         reg = _RAX_REGNO;
1187         movr(r0, _RAX_REGNO);
1188     }
1189     ixorr(reg, reg);
1190     ucomisdr(r2, r1);
1191     jp_code = jpes(0);
1192     cc(X86_CC_E, reg);
1193     patch_at(jp_code, _jit->pc.w);
1194     if (!rc)
1195         xchgr(r0, reg);
1196 }
1197
1198 dopi(eq)
1199 dopi(ge)
1200 dopi(gt)
1201
1202 static void
1203 _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1204 {
1205     jit_bool_t          rc;
1206     jit_int32_t         reg;
1207     jit_word_t          jp_code;
1208     if ((rc = reg8_p(r0)))
1209         reg = r0;
1210     else {
1211         reg = _RAX_REGNO;
1212         movr(r0, _RAX_REGNO);
1213     }
1214     imovi(reg, 1);
1215     ucomisdr(r2, r1);
1216     jp_code = jpes(0);
1217     cc(X86_CC_NE, reg);
1218     patch_at(jp_code, _jit->pc.w);
1219     if (!rc)
1220         xchgr(r0, reg);
1221 }
1222
1223 dopi(ne)
1224 dopi(unlt)
1225
1226 static void
1227 _sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1228 {
1229     if (r1 == r2)
1230         movi(r0, 1);
1231     else
1232         ssecmpd(X86_CC_NA, r0, r2, r1);
1233 }
1234
1235 dopi(unle)
1236
1237 static void
1238 _sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1239 {
1240     if (r1 == r2)
1241         movi(r0, 1);
1242     else
1243         ssecmpd(X86_CC_E, r0, r1, r2);
1244 }
1245
1246 dopi(uneq)
1247
1248 static void
1249 _sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1250 {
1251     if (r1 == r2)
1252         movi(r0, 1);
1253     else
1254         ssecmpd(X86_CC_NA, r0, r1, r2);
1255 }
1256
1257 dopi(unge)
1258 dopi(ungt)
1259
1260 static void
1261 _sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1262 {
1263     if (r1 == r2)
1264         ixorr(r0, r0);
1265     else
1266         ssecmpd(X86_CC_NE, r0, r1, r2);
1267 }
1268
1269 dopi(ltgt)
1270 dopi(ord)
1271 dopi(unord)
1272
1273 static void
1274 _sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
1275 {
1276     if (r0 != r1)
1277         ssexr(0xf2, X86_SSE_MOV, r0, r1);
1278 }
1279
1280 static void
1281 _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
1282 {
1283     union {
1284         jit_int32_t      ii[2];
1285         jit_word_t       w;
1286         jit_float64_t    d;
1287     } data;
1288     jit_int32_t          reg;
1289     jit_bool_t           ldi;
1290
1291     data.d = *i0;
1292     if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
1293         xorpdr(r0, r0);
1294     else {
1295         ldi = !_jitc->no_data;
1296 #if __X64
1297         /* if will allocate a register for offset, just use immediate */
1298 #  if CAN_RIP_ADDRESS
1299         if (ldi) {
1300             jit_word_t  rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1301             ldi = can_sign_extend_int_p(rel);
1302             if (!ldi && address_p(i0))
1303                 ldi = 1;
1304         }
1305 #  else
1306         if (ldi && !address_p(i0))
1307             ldi = 0;
1308 #  endif
1309 #endif
1310         if (ldi)
1311             sse_ldi_d(r0, (jit_word_t)i0);
1312         else {
1313             reg = jit_get_reg(jit_class_gpr);
1314 #if __X64 && !__X64_32
1315             movi(rn(reg), data.w);
1316             movdqxr(r0, rn(reg));
1317             jit_unget_reg(reg);
1318 #else
1319             CHECK_CVT_OFFSET();
1320             movi(rn(reg), data.ii[0]);
1321             stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
1322             movi(rn(reg), data.ii[1]);
1323             stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
1324             jit_unget_reg(reg);
1325             sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
1326 #endif
1327         }
1328     }
1329 }
1330
1331 static void
1332 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
1333 {
1334     jit_int32_t         reg;
1335 #if CAN_RIP_ADDRESS
1336     jit_word_t          rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1337     if (can_sign_extend_int_p(rel))
1338         movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
1339     else
1340 #endif
1341     if (address_p(i0))
1342         movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
1343     else {
1344         reg = jit_get_reg(jit_class_gpr);
1345         movi(rn(reg), i0);
1346         sse_ldr_d(r0, rn(reg));
1347         jit_unget_reg(reg);
1348     }
1349 }
1350
1351 static void
1352 _sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1353 {
1354 #if __X64_32
1355     jit_int32_t         reg;
1356     reg = jit_get_reg(jit_class_gpr);
1357     addr(rn(reg), r1, r2);
1358     sse_ldr_d(r0, rn(reg));
1359     jit_unget_reg(reg);
1360 #else
1361     movsdmr(0, r1, r2, _SCL1, r0);
1362 #endif
1363 }
1364
1365 static void
1366 _sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
1367 {
1368     jit_int32_t         reg;
1369     if (can_sign_extend_int_p(i0))
1370         movsdmr(i0, r1, _NOREG, _SCL1, r0);
1371     else {
1372         reg = jit_get_reg(jit_class_gpr);
1373 #if __X64_32
1374         addi(rn(reg), r1, i0);
1375         sse_ldr_d(r0, rn(reg));
1376 #else
1377         movi(rn(reg), i0);
1378         sse_ldxr_d(r0, r1, rn(reg));
1379 #endif
1380         jit_unget_reg(reg);
1381     }
1382 }
1383
1384 static void
1385 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
1386 {
1387     jit_int32_t         reg;
1388 #if CAN_RIP_ADDRESS
1389     jit_word_t          rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
1390     if (can_sign_extend_int_p(rel))
1391         movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
1392     else
1393 #endif
1394     if (address_p(i0))
1395         movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
1396     else {
1397         reg = jit_get_reg(jit_class_gpr);
1398         movi(rn(reg), i0);
1399         sse_str_d(rn(reg), r0);
1400         jit_unget_reg(reg);
1401     }
1402 }
1403
1404 static void
1405 _sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
1406 {
1407 #if __X64_32
1408     jit_int32_t         reg;
1409     reg = jit_get_reg(jit_class_gpr);
1410     addr(rn(reg), r0, r1);
1411     sse_str_d(rn(reg), r2);
1412     jit_unget_reg(reg);
1413 #else
1414     movsdrm(r2, 0, r0, r1, _SCL1);
1415 #endif
1416 }
1417
1418 static void
1419 _sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1420 {
1421     jit_int32_t         reg;
1422     if (can_sign_extend_int_p(i0))
1423         movsdrm(r1, i0, r0, _NOREG, _SCL1);
1424     else {
1425         reg = jit_get_reg(jit_class_gpr);
1426 #if __X64_32
1427         addi(rn(reg), r0, i0);
1428         sse_str_d(rn(reg), r1);
1429 #else
1430         movi(rn(reg), i0);
1431         sse_stxr_f(rn(reg), r0, r1);
1432 #endif
1433         jit_unget_reg(reg);
1434     }
1435 }
1436
1437 static jit_word_t
1438 _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1439 {
1440     ucomisdr(r1, r0);
1441     return (ja(i0));
1442 }
1443 dbopi(lt)
1444
1445 static jit_word_t
1446 _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1447 {
1448     ucomisdr(r1, r0);
1449     return (jae(i0));
1450 }
1451 dbopi(le)
1452
1453 static jit_word_t
1454 _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1455 {
1456     jit_word_t          w;
1457     jit_word_t          jp_code;
1458     ucomisdr(r0, r1);
1459     jp_code = jps(0);
1460     w = je(i0);
1461     patch_at(jp_code, _jit->pc.w);
1462     return (w);
1463 }
1464 dbopi(eq)
1465
1466 static jit_word_t
1467 _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1468 {
1469     ucomisdr(r0, r1);
1470     return (jae(i0));
1471 }
1472 dbopi(ge)
1473
1474 static jit_word_t
1475 _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1476 {
1477     ucomisdr(r0, r1);
1478     return (ja(i0));
1479 }
1480 dbopi(gt)
1481
1482 static jit_word_t
1483 _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1484 {
1485     jit_word_t          w;
1486     jit_word_t          jp_code;
1487     jit_word_t          jz_code;
1488     ucomisdr(r0, r1);
1489     jp_code = jps(0);
1490     jz_code = jzs(0);
1491     patch_at(jp_code, _jit->pc.w);
1492     w = jmpi(i0);
1493     patch_at(jz_code, _jit->pc.w);
1494     return (w);
1495 }
1496 dbopi(ne)
1497
1498 static jit_word_t
1499 _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1500 {
1501     ucomisdr(r0, r1);
1502     return (jnae(i0));
1503 }
1504 dbopi(unlt)
1505
1506 static jit_word_t
1507 _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1508 {
1509     jit_word_t          w;
1510     if (r0 == r1)
1511         w = jmpi(i0);
1512     else {
1513         ucomisdr(r0, r1);
1514         w = jna(i0);
1515     }
1516     return (w);
1517 }
1518 dbopi(unle)
1519
1520 static jit_word_t
1521 _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1522 {
1523     jit_word_t          w;
1524     if (r0 == r1)
1525         w = jmpi(i0);
1526     else {
1527         ucomisdr(r0, r1);
1528         w = je(i0);
1529     }
1530     return (w);
1531 }
1532 dbopi(uneq)
1533
1534 static jit_word_t
1535 _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1536 {
1537     jit_word_t          w;
1538     if (r0 == r1)
1539         w = jmpi(i0);
1540     else {
1541         ucomisdr(r1, r0);
1542         w = jna(i0);
1543     }
1544     return (w);
1545 }
1546 dbopi(unge)
1547
1548 static jit_word_t
1549 _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1550 {
1551     ucomisdr(r1, r0);
1552     return (jnae(i0));
1553 }
1554 dbopi(ungt)
1555
1556 static jit_word_t
1557 _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1558 {
1559     ucomisdr(r0, r1);
1560     return (jne(i0));
1561 }
1562 dbopi(ltgt)
1563
1564 static jit_word_t
1565 _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1566 {
1567     ucomisdr(r0, r1);
1568     return (jnp(i0));
1569 }
1570 dbopi(ord)
1571
1572 static jit_word_t
1573 _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
1574 {
1575     ucomisdr(r0, r1);
1576     return (jp(i0));
1577 }
1578 dbopi(unord)
1579 #  undef fopi
1580 #  undef fbopi
1581 #  undef bopi
1582 #  undef dbopi
1583 #  undef fpr_bopi
1584 #  undef fpr_opi
1585 #endif