Commit | Line | Data |
---|---|---|
ef79bbde P |
1 | // stop compiling if NORECBUILD build (only for Visual Studio) |
2 | #if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) | |
3 | ||
4 | #include <assert.h> | |
5 | #include "ix86-64.h" | |
6 | ||
7 | PCSX2_ALIGNED16(static unsigned int p[4]); | |
8 | PCSX2_ALIGNED16(static unsigned int p2[4]); | |
9 | PCSX2_ALIGNED16(static float f[4]); | |
10 | ||
11 | ||
12 | XMMSSEType g_xmmtypes[XMMREGS] = {0}; | |
13 | ||
14 | /********************/ | |
15 | /* SSE instructions */ | |
16 | /********************/ | |
17 | ||
18 | #define SSEMtoRv( nc, code, overb ) \ | |
19 | assert( cpucaps.hasStreamingSIMDExtensions ); \ | |
20 | assert( to < XMMREGS ) ; \ | |
21 | MEMADDR_OP(0, nc, code, true, to, from, overb) | |
22 | ||
23 | #define SSEMtoR( code, overb ) SSEMtoRv(2, code, overb) | |
24 | ||
25 | #define SSERtoMv( nc, code, overb ) \ | |
26 | assert( cpucaps.hasStreamingSIMDExtensions ); \ | |
27 | assert( from < XMMREGS) ; \ | |
28 | MEMADDR_OP(0, nc, code, true, from, to, overb) | |
29 | ||
30 | #define SSERtoM( code, overb ) SSERtoMv( 2, code, overb ) \ | |
31 | ||
32 | #define SSE_SS_MtoR( code, overb ) \ | |
33 | SSEMtoRv(3, (code << 8) | 0xF3, overb) | |
34 | ||
35 | #define SSE_SS_RtoM( code, overb ) \ | |
36 | SSERtoMv(3, (code << 8) | 0xF3, overb) | |
37 | ||
38 | #define SSERtoR( code ) \ | |
39 | assert( cpucaps.hasStreamingSIMDExtensions ); \ | |
40 | assert( to < XMMREGS && from < XMMREGS) ; \ | |
41 | RexRB(0, to, from); \ | |
42 | write16( code ); \ | |
43 | ModRM( 3, to, from ); | |
44 | ||
45 | #define SSEMtoR66( code ) \ | |
46 | SSEMtoRv( 3, (code << 8) | 0x66, 0 ) | |
47 | ||
48 | #define SSERtoM66( code ) \ | |
49 | SSERtoMv( 3, (code << 8) | 0x66, 0 ) | |
50 | ||
51 | #define SSERtoR66( code ) \ | |
52 | write8( 0x66 ); \ | |
53 | SSERtoR( code ); | |
54 | ||
55 | #define _SSERtoR66( code ) \ | |
56 | assert( cpucaps.hasStreamingSIMDExtensions ); \ | |
57 | assert( to < XMMREGS && from < XMMREGS) ; \ | |
58 | write8( 0x66 ); \ | |
59 | RexRB(0, from, to); \ | |
60 | write16( code ); \ | |
61 | ModRM( 3, from, to ); | |
62 | ||
63 | #define SSE_SS_RtoR( code ) \ | |
64 | assert( cpucaps.hasStreamingSIMDExtensions ); \ | |
65 | assert( to < XMMREGS && from < XMMREGS) ; \ | |
66 | write8( 0xf3 ); \ | |
67 | RexRB(0, to, from); \ | |
68 | write16( code ); \ | |
69 | ModRM( 3, to, from ); | |
70 | ||
71 | #define CMPPSMtoR( op ) \ | |
72 | SSEMtoR( 0xc20f, 1 ); \ | |
73 | write8( op ); | |
74 | ||
75 | #define CMPPSRtoR( op ) \ | |
76 | SSERtoR( 0xc20f ); \ | |
77 | write8( op ); | |
78 | ||
79 | #define CMPSSMtoR( op ) \ | |
80 | SSE_SS_MtoR( 0xc20f, 1 ); \ | |
81 | write8( op ); | |
82 | ||
83 | #define CMPSSRtoR( op ) \ | |
84 | SSE_SS_RtoR( 0xc20f ); \ | |
85 | write8( op ); | |
86 | ||
87 | ||
88 | ||
89 | void WriteRmOffset(x86IntRegType to, int offset); | |
90 | void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); | |
91 | ||
92 | /* movups [r32][r32*scale] to xmm1 */ | |
93 | void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) | |
94 | { | |
95 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
96 | RexRXB(0, to, from2, from); | |
97 | write16( 0x100f ); | |
98 | ModRM( 0, to, 0x4 ); | |
99 | SibSB( scale, from2, from ); | |
100 | } | |
101 | ||
102 | /* movups xmm1 to [r32][r32*scale] */ | |
103 | void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) | |
104 | { | |
105 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
106 | RexRXB(1, to, from2, from); | |
107 | write16( 0x110f ); | |
108 | ModRM( 0, to, 0x4 ); | |
109 | SibSB( scale, from2, from ); | |
110 | } | |
111 | ||
112 | /* movups [r32] to r32 */ | |
113 | void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) | |
114 | { | |
115 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
116 | RexRB(0, to, from); | |
117 | write16( 0x100f ); | |
118 | ModRM( 0, to, from ); | |
119 | } | |
120 | ||
121 | /* movups r32 to [r32] */ | |
122 | void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) | |
123 | { | |
124 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
125 | RexRB(0, from, to); | |
126 | write16( 0x110f ); | |
127 | ModRM( 0, from, to ); | |
128 | } | |
129 | ||
130 | /* movlps [r32] to r32 */ | |
131 | void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) | |
132 | { | |
133 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
134 | RexRB(1, to, from); | |
135 | write16( 0x120f ); | |
136 | ModRM( 0, to, from ); | |
137 | } | |
138 | ||
139 | void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) | |
140 | { | |
141 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
142 | RexRB(0, to, from); | |
143 | write16( 0x120f ); | |
144 | WriteRmOffsetFrom(to, from, offset); | |
145 | } | |
146 | ||
147 | /* movaps r32 to [r32] */ | |
148 | void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) | |
149 | { | |
150 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
151 | RexRB(0, from, to); | |
152 | write16( 0x130f ); | |
153 | ModRM( 0, from, to ); | |
154 | } | |
155 | ||
156 | void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) | |
157 | { | |
158 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
159 | RexRB(0, from, to); | |
160 | write16( 0x130f ); | |
161 | WriteRmOffsetFrom(from, to, offset); | |
162 | } | |
163 | ||
164 | /* movaps [r32][r32*scale] to xmm1 */ | |
165 | void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) | |
166 | { | |
167 | assert( cpucaps.hasStreamingSIMDExtensions && from != EBP ); | |
168 | RexRXB(0, to, from2, from); | |
169 | write16( 0x280f ); | |
170 | ModRM( 0, to, 0x4 ); | |
171 | SibSB( scale, from2, from ); | |
172 | } | |
173 | ||
174 | /* movaps xmm1 to [r32][r32*scale] */ | |
175 | void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) | |
176 | { | |
177 | assert( cpucaps.hasStreamingSIMDExtensions && from != EBP ); | |
178 | RexRXB(0, to, from2, from); | |
179 | write16( 0x290f ); | |
180 | ModRM( 0, to, 0x4 ); | |
181 | SibSB( scale, from2, from ); | |
182 | } | |
183 | ||
184 | // movaps [r32+offset] to r32 | |
185 | void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) | |
186 | { | |
187 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
188 | RexRB(0, to, from); | |
189 | write16( 0x280f ); | |
190 | WriteRmOffsetFrom(to, from, offset); | |
191 | } | |
192 | ||
193 | // movaps r32 to [r32+offset] | |
194 | void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) | |
195 | { | |
196 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
197 | RexRB(0, from, to); | |
198 | write16( 0x290f ); | |
199 | WriteRmOffsetFrom(from, to, offset); | |
200 | } | |
201 | ||
202 | // movdqa [r32+offset] to r32 | |
203 | void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) | |
204 | { | |
205 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
206 | write8(0x66); | |
207 | RexRB(0, to, from); | |
208 | write16( 0x6f0f ); | |
209 | WriteRmOffsetFrom(to, from, offset); | |
210 | } | |
211 | ||
212 | // movdqa r32 to [r32+offset] | |
213 | void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) | |
214 | { | |
215 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
216 | write8(0x66); | |
217 | RexRB(0, from, to); | |
218 | write16( 0x7f0f ); | |
219 | WriteRmOffsetFrom(from, to, offset); | |
220 | } | |
221 | ||
222 | // movups [r32+offset] to r32 | |
223 | void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) | |
224 | { | |
225 | RexRB(0, to, from); | |
226 | write16( 0x100f ); | |
227 | WriteRmOffsetFrom(to, from, offset); | |
228 | } | |
229 | ||
230 | // movups r32 to [r32+offset] | |
231 | void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) | |
232 | { | |
233 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
234 | RexRB(0, from, to); | |
235 | write16( 0x110f ); | |
236 | WriteRmOffsetFrom(from, to, offset); | |
237 | } | |
238 | ||
239 | //**********************************************************************************/ | |
240 | //MOVAPS: Move aligned Packed Single Precision FP values * | |
241 | //********************************************************************************** | |
242 | void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } | |
243 | void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } | |
244 | void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x280f ); } | |
245 | ||
246 | void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } | |
247 | void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } | |
248 | ||
249 | void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
250 | { | |
251 | if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVSD_XMM_to_XMM(to, from); | |
252 | else { | |
253 | write8(0xf2); | |
254 | SSERtoR( 0x100f); | |
255 | } | |
256 | } | |
257 | ||
258 | void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) | |
259 | { | |
260 | if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_M64_to_XMM(to, from); | |
261 | else { | |
262 | SSE_SS_MtoR( 0x7e0f, 0); | |
263 | } | |
264 | } | |
265 | ||
266 | void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
267 | { | |
268 | if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_XMM_to_XMM(to, from); | |
269 | else { | |
270 | SSE_SS_RtoR( 0x7e0f); | |
271 | } | |
272 | } | |
273 | ||
274 | void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) | |
275 | { | |
276 | if( !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVLPS_XMM_to_M64(to, from); | |
277 | else { | |
278 | SSERtoM66(0xd60f); | |
279 | } | |
280 | } | |
281 | ||
282 | #ifndef __x86_64__ | |
283 | void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) | |
284 | { | |
285 | if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVDQ2Q_XMM_to_MM(to, from); | |
286 | else { | |
287 | write8(0xf2); | |
288 | SSERtoR( 0xd60f); | |
289 | } | |
290 | } | |
291 | void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) | |
292 | { | |
293 | if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ2DQ_MM_to_XMM(to, from); | |
294 | else { | |
295 | SSE_SS_RtoR( 0xd60f); | |
296 | } | |
297 | } | |
298 | #endif | |
299 | ||
300 | //**********************************************************************************/ | |
301 | //MOVSS: Move Scalar Single-Precision FP value * | |
302 | //********************************************************************************** | |
303 | void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } | |
304 | void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } | |
305 | void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) | |
306 | { | |
307 | write8(0xf3); | |
308 | RexRB(0, from, to); | |
309 | write16(0x110f); | |
310 | ModRM(0, from, to); | |
311 | } | |
312 | ||
313 | void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x100f ); } | |
314 | ||
315 | void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) | |
316 | { | |
317 | write8(0xf3); | |
318 | RexRB(0, to, from); | |
319 | write16( 0x100f ); | |
320 | WriteRmOffsetFrom(to, from, offset); | |
321 | } | |
322 | ||
323 | void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) | |
324 | { | |
325 | write8(0xf3); | |
326 | RexRB(0, from, to); | |
327 | write16(0x110f); | |
328 | WriteRmOffsetFrom(from, to, offset); | |
329 | } | |
330 | ||
331 | void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } | |
332 | //**********************************************************************************/ | |
333 | //MOVLPS: Move low Packed Single-Precision FP * | |
334 | //********************************************************************************** | |
335 | void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } | |
336 | void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } | |
337 | ||
338 | void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) | |
339 | { | |
340 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
341 | RexRB(0, to, from); | |
342 | write16( 0x120f ); | |
343 | WriteRmOffsetFrom(to, from, offset); | |
344 | } | |
345 | ||
346 | void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) | |
347 | { | |
348 | RexRB(0, from, to); | |
349 | write16(0x130f); | |
350 | WriteRmOffsetFrom(from, to, offset); | |
351 | } | |
352 | ||
353 | ///////////////////////////////////////////////////////////////////////////////////// | |
354 | //**********************************************************************************/ | |
355 | //MOVHPS: Move High Packed Single-Precision FP * | |
356 | //********************************************************************************** | |
357 | void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } | |
358 | void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } | |
359 | ||
360 | void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) | |
361 | { | |
362 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
363 | RexRB(0, to, from); | |
364 | write16( 0x160f ); | |
365 | WriteRmOffsetFrom(to, from, offset); | |
366 | } | |
367 | ||
368 | void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) | |
369 | { | |
370 | assert( cpucaps.hasStreamingSIMDExtensions ); | |
371 | RexRB(0, from, to); | |
372 | write16(0x170f); | |
373 | WriteRmOffsetFrom(from, to, offset); | |
374 | } | |
375 | ||
376 | ///////////////////////////////////////////////////////////////////////////////////// | |
377 | //**********************************************************************************/ | |
378 | //MOVLHPS: Moved packed Single-Precision FP low to high * | |
379 | //********************************************************************************** | |
380 | void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } | |
381 | ||
382 | ////////////////////////////////////////////////////////////////////////////////////// | |
383 | //**********************************************************************************/ | |
384 | //MOVHLPS: Moved packed Single-Precision FP High to Low * | |
385 | //********************************************************************************** | |
386 | void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } | |
387 | ||
388 | /////////////////////////////////////////////////////////////////////////////////// | |
389 | //**********************************************************************************/ | |
390 | //ANDPS: Logical Bit-wise AND for Single FP * | |
391 | //********************************************************************************** | |
392 | void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } | |
393 | void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } | |
394 | ||
395 | /////////////////////////////////////////////////////////////////////////////////////// | |
396 | //**********************************************************************************/ | |
397 | //ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values * | |
398 | //********************************************************************************** | |
399 | void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } | |
400 | void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } | |
401 | ||
402 | ///////////////////////////////////////////////////////////////////////////////////// | |
403 | //**********************************************************************************/ | |
404 | //RCPPS : Packed Single-Precision FP Reciprocal * | |
405 | //********************************************************************************** | |
406 | void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } | |
407 | void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } | |
408 | ||
409 | void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } | |
410 | void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } | |
411 | ||
412 | ////////////////////////////////////////////////////////////////////////////////////// | |
413 | //**********************************************************************************/ | |
414 | //ORPS : Bit-wise Logical OR of Single-Precision FP Data * | |
415 | //********************************************************************************** | |
416 | void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } | |
417 | void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } | |
418 | ||
419 | ///////////////////////////////////////////////////////////////////////////////////// | |
420 | //**********************************************************************************/ | |
421 | //XORPS : Bitwise Logical XOR of Single-Precision FP Values * | |
422 | //********************************************************************************** | |
423 | void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } | |
424 | void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } | |
425 | ||
426 | /////////////////////////////////////////////////////////////////////////////////////// | |
427 | //**********************************************************************************/ | |
428 | //ADDPS : ADD Packed Single-Precision FP Values * | |
429 | //********************************************************************************** | |
430 | void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } | |
431 | void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } | |
432 | ||
433 | //////////////////////////////////////////////////////////////////////////////////// | |
434 | //**********************************************************************************/ | |
435 | //ADDSS : ADD Scalar Single-Precision FP Values * | |
436 | //********************************************************************************** | |
437 | void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } | |
438 | void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } | |
439 | ||
440 | ///////////////////////////////////////////////////////////////////////////////////////// | |
441 | //**********************************************************************************/ | |
442 | //SUBPS: Packed Single-Precision FP Subtract * | |
443 | //********************************************************************************** | |
444 | void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } | |
445 | void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } | |
446 | ||
447 | /////////////////////////////////////////////////////////////////////////////////////// | |
448 | //**********************************************************************************/ | |
449 | //SUBSS : Scalar Single-Precision FP Subtract * | |
450 | //********************************************************************************** | |
451 | void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } | |
452 | void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } | |
453 | ||
454 | ///////////////////////////////////////////////////////////////////////////////////////// | |
455 | //**********************************************************************************/ | |
456 | //MULPS : Packed Single-Precision FP Multiply * | |
457 | //********************************************************************************** | |
458 | void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } | |
459 | void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } | |
460 | ||
461 | //////////////////////////////////////////////////////////////////////////////////////// | |
462 | //**********************************************************************************/ | |
463 | //MULSS : Scalar Single-Precision FP Multiply * | |
464 | //********************************************************************************** | |
465 | void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } | |
466 | void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } | |
467 | ||
468 | //////////////////////////////////////////////////////////////////////////////////////////// | |
469 | //**********************************************************************************/ | |
470 | //Packed Single-Precission FP compare (CMPccPS) * | |
471 | //********************************************************************************** | |
472 | //missing SSE_CMPPS_I8_to_XMM | |
473 | // SSE_CMPPS_M32_to_XMM | |
474 | // SSE_CMPPS_XMM_to_XMM | |
475 | void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); } | |
476 | void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); } | |
477 | void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); } | |
478 | void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); } | |
479 | void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); } | |
480 | void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); } | |
481 | void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); } | |
482 | void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); } | |
483 | void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); } | |
484 | void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); } | |
485 | void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); } | |
486 | void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); } | |
487 | void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); } | |
488 | void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); } | |
489 | void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); } | |
490 | void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); } | |
491 | ||
492 | /////////////////////////////////////////////////////////////////////////////////////////// | |
493 | //**********************************************************************************/ | |
494 | //Scalar Single-Precission FP compare (CMPccSS) * | |
495 | //********************************************************************************** | |
496 | //missing SSE_CMPSS_I8_to_XMM | |
497 | // SSE_CMPSS_M32_to_XMM | |
498 | // SSE_CMPSS_XMM_to_XMM | |
499 | void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); } | |
500 | void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); } | |
501 | void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); } | |
502 | void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); } | |
503 | void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); } | |
504 | void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); } | |
505 | void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); } | |
506 | void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); } | |
507 | void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); } | |
508 | void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); } | |
509 | void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); } | |
510 | void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); } | |
511 | void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); } | |
512 | void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); } | |
513 | void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } | |
514 | void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } | |
515 | ||
516 | void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) | |
517 | { | |
518 | MEMADDR_OP(0, VAROP2(0x0F, 0x2E), true, to, from, 0); | |
519 | } | |
520 | ||
521 | void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
522 | { | |
523 | RexRB(0, to, from); | |
524 | write16( 0x2e0f ); | |
525 | ModRM( 3, to, from ); | |
526 | } | |
527 | ||
528 | ////////////////////////////////////////////////////////////////////////////////////////// | |
529 | //**********************************************************************************/ | |
530 | //RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * | |
531 | //********************************************************************************** | |
532 | void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } | |
533 | void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x520f ); } | |
534 | ||
535 | ///////////////////////////////////////////////////////////////////////////////////// | |
536 | //**********************************************************************************/ | |
537 | //RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal * | |
538 | //********************************************************************************** | |
539 | void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } | |
540 | void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x520f ); } | |
541 | ||
542 | //////////////////////////////////////////////////////////////////////////////////// | |
543 | //**********************************************************************************/ | |
544 | //SQRTPS : Packed Single-Precision FP Square Root * | |
545 | //********************************************************************************** | |
546 | void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } | |
547 | void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x510f ); } | |
548 | ||
549 | ////////////////////////////////////////////////////////////////////////////////////// | |
550 | //**********************************************************************************/ | |
551 | //SQRTSS : Scalar Single-Precision FP Square Root * | |
552 | //********************************************************************************** | |
553 | void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } | |
554 | void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x510f ); } | |
555 | ||
556 | //////////////////////////////////////////////////////////////////////////////////////// | |
557 | //**********************************************************************************/ | |
558 | //MAXPS: Return Packed Single-Precision FP Maximum * | |
559 | //********************************************************************************** | |
560 | void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } | |
561 | void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } | |
562 | ||
563 | ///////////////////////////////////////////////////////////////////////////////////////// | |
564 | //**********************************************************************************/ | |
565 | //MAXSS: Return Scalar Single-Precision FP Maximum * | |
566 | //********************************************************************************** | |
567 | void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } | |
568 | void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } | |
569 | ||
570 | #ifndef __x86_64__ | |
571 | ///////////////////////////////////////////////////////////////////////////////////////// | |
572 | //**********************************************************************************/ | |
573 | //CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * | |
574 | //********************************************************************************** | |
575 | void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } | |
576 | void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } | |
577 | ||
578 | /////////////////////////////////////////////////////////////////////////////////////////// | |
579 | //**********************************************************************************/ | |
580 | //CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion * | |
581 | //********************************************************************************** | |
582 | void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } | |
583 | void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } | |
584 | #endif | |
585 | ||
586 | void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { SSE_SS_MtoR(0x2c0f, 0); } | |
587 | void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) | |
588 | { | |
589 | write8(0xf3); | |
590 | RexRB(0, to, from); | |
591 | write16(0x2c0f); | |
592 | ModRM(3, to, from); | |
593 | } | |
594 | ||
595 | void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x2a0f, 0); } | |
596 | void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) | |
597 | { | |
598 | write8(0xf3); | |
599 | RexRB(0, to, from); | |
600 | write16(0x2a0f); | |
601 | ModRM(3, to, from); | |
602 | } | |
603 | ||
604 | /////////////////////////////////////////////////////////////////////////////////////////// | |
605 | //**********************************************************************************/ | |
606 | //CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * | |
607 | //********************************************************************************** | |
608 | void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } | |
609 | void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } | |
610 | ||
611 | //**********************************************************************************/ | |
612 | //CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion * | |
613 | //********************************************************************************** | |
614 | void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } | |
615 | void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } | |
616 | ||
617 | void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x5b0f); } | |
618 | ///////////////////////////////////////////////////////////////////////////////////// | |
619 | //**********************************************************************************/ | |
620 | //MINPS: Return Packed Single-Precision FP Minimum * | |
621 | //********************************************************************************** | |
622 | void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } | |
623 | void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } | |
624 | ||
625 | ////////////////////////////////////////////////////////////////////////////////////////// | |
626 | //**********************************************************************************/ | |
627 | //MINSS: Return Scalar Single-Precision FP Minimum * | |
628 | //********************************************************************************** | |
629 | void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } | |
630 | void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } | |
631 | ||
632 | #ifndef __x86_64__ | |
633 | /////////////////////////////////////////////////////////////////////////////////////////// | |
634 | //**********************************************************************************/ | |
635 | //PMAXSW: Packed Signed Integer Word Maximum * | |
636 | //********************************************************************************** | |
637 | //missing | |
638 | // SSE_PMAXSW_M64_to_MM | |
639 | // SSE2_PMAXSW_M128_to_XMM | |
640 | // SSE2_PMAXSW_XMM_to_XMM | |
641 | void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } | |
642 | ||
643 | /////////////////////////////////////////////////////////////////////////////////////// | |
644 | //**********************************************************************************/ | |
645 | //PMINSW: Packed Signed Integer Word Minimum * | |
646 | //********************************************************************************** | |
647 | //missing | |
648 | // SSE_PMINSW_M64_to_MM | |
649 | // SSE2_PMINSW_M128_to_XMM | |
650 | // SSE2_PMINSW_XMM_to_XMM | |
651 | void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } | |
652 | #endif | |
653 | ||
654 | ////////////////////////////////////////////////////////////////////////////////////// | |
655 | //**********************************************************************************/ | |
656 | //SHUFPS: Shuffle Packed Single-Precision FP Values * | |
657 | //********************************************************************************** | |
658 | void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } | |
659 | void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } | |
660 | ||
661 | void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) | |
662 | { | |
663 | RexRB(0, to, from); | |
664 | write16(0xc60f); | |
665 | WriteRmOffsetFrom(to, from, offset); | |
666 | write8(imm8); | |
667 | } | |
668 | ||
669 | //////////////////////////////////////////////////////////////////////////////////// | |
670 | //**********************************************************************************/ | |
671 | //PSHUFD: Shuffle Packed DoubleWords * | |
672 | //********************************************************************************** | |
673 | void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) | |
674 | { | |
675 | if( !cpucaps.hasStreamingSIMD2Extensions ) { | |
676 | SSE2EMU_PSHUFD_XMM_to_XMM(to, from, imm8); | |
677 | } | |
678 | else { | |
679 | SSERtoR66( 0x700F ); | |
680 | write8( imm8 ); | |
681 | } | |
682 | } | |
683 | void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv( 3, 0x700F66, 1 ); write8( imm8 ); } | |
684 | ||
685 | void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } | |
686 | void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv(3, 0x700FF2, 1); write8(imm8); } | |
687 | void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSE_SS_RtoR(0x700F); write8(imm8); } | |
688 | void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSE_SS_MtoR(0x700F, 1); write8(imm8); } | |
689 | ||
690 | /////////////////////////////////////////////////////////////////////////////////// | |
691 | //**********************************************************************************/ | |
692 | //UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data * | |
693 | //********************************************************************************** | |
694 | void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } | |
695 | void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } | |
696 | ||
697 | //////////////////////////////////////////////////////////////////////////////////////// | |
698 | //**********************************************************************************/ | |
699 | //UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data * | |
700 | //********************************************************************************** | |
701 | void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } | |
702 | void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } | |
703 | ||
704 | //////////////////////////////////////////////////////////////////////////////////////// | |
705 | //**********************************************************************************/ | |
706 | //DIVPS : Packed Single-Precision FP Divide * | |
707 | //********************************************************************************** | |
708 | void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } | |
709 | void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } | |
710 | ||
711 | ////////////////////////////////////////////////////////////////////////////////////// | |
712 | //**********************************************************************************/ | |
713 | //DIVSS : Scalar Single-Precision FP Divide * | |
714 | //********************************************************************************** | |
715 | void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } | |
716 | void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } | |
717 | ||
718 | ///////////////////////////////////////////////////////////////////////////////////////// | |
719 | //**********************************************************************************/ | |
720 | //STMXCSR : Store Streaming SIMD Extension Control/Status * | |
721 | //********************************************************************************** | |
722 | void SSE_STMXCSR( uptr from ) { | |
723 | MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 3, from, 0); | |
724 | } | |
725 | ||
726 | ///////////////////////////////////////////////////////////////////////////////////// | |
727 | //**********************************************************************************/ | |
728 | //LDMXCSR : Load Streaming SIMD Extension Control/Status * | |
729 | //********************************************************************************** | |
730 | void SSE_LDMXCSR( uptr from ) { | |
731 | MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 2, from, 0); | |
732 | } | |
733 | ||
734 | ///////////////////////////////////////////////////////////////////////////////////// | |
735 | //**********************************************************************************/ | |
736 | //PADDB,PADDW,PADDD : Add Packed Integers * | |
737 | //********************************************************************************** | |
738 | void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } | |
739 | void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } | |
740 | void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } | |
741 | void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } | |
742 | void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } | |
743 | void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } | |
744 | ||
745 | void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } | |
746 | void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } | |
747 | ||
748 | /////////////////////////////////////////////////////////////////////////////////// | |
749 | //**********************************************************************************/ | |
750 | //PCMPxx: Compare Packed Integers * | |
751 | //********************************************************************************** | |
752 | void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } | |
753 | void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } | |
754 | void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } | |
755 | void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } | |
756 | void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } | |
757 | void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } | |
758 | void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } | |
759 | void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } | |
760 | void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } | |
761 | void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } | |
762 | void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) | |
763 | { | |
764 | if( !cpucaps.hasStreamingSIMD2Extensions ) { | |
765 | SSE_CMPEQPS_XMM_to_XMM(to, from); | |
766 | } | |
767 | else { | |
768 | SSERtoR66( 0x760F ); | |
769 | } | |
770 | } | |
771 | ||
772 | void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ) | |
773 | { | |
774 | if( !cpucaps.hasStreamingSIMD2Extensions ) { | |
775 | SSE_CMPEQPS_M128_to_XMM(to, from); | |
776 | } | |
777 | else { | |
778 | SSEMtoR66( 0x760F ); | |
779 | } | |
780 | } | |
781 | ||
782 | //////////////////////////////////////////////////////////////////////////////////////////// | |
783 | //**********************************************************************************/ | |
784 | //PEXTRW,PINSRW: Packed Extract/Insert Word * | |
785 | //********************************************************************************** | |
786 | void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } | |
787 | void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } | |
788 | ||
789 | //////////////////////////////////////////////////////////////////////////////////////////// | |
790 | //**********************************************************************************/ | |
791 | //PSUBx: Subtract Packed Integers * | |
792 | //********************************************************************************** | |
793 | void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } | |
794 | void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } | |
795 | void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } | |
796 | void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } | |
797 | void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } | |
798 | void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } | |
799 | void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } | |
800 | void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } | |
801 | ||
802 | /////////////////////////////////////////////////////////////////////////////////////// | |
803 | //**********************************************************************************/ | |
804 | //MOVD: Move Dword(32bit) to /from XMM reg * | |
805 | //********************************************************************************** | |
806 | void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } | |
807 | void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) | |
808 | { | |
809 | if( !cpucaps.hasStreamingSIMD2Extensions ) { | |
810 | SSE2EMU_MOVD_R_to_XMM(to, from); | |
811 | } | |
812 | else { | |
813 | SSERtoR66(0x6E0F); | |
814 | } | |
815 | } | |
816 | ||
817 | void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) | |
818 | { | |
819 | write8(0x66); | |
820 | RexRB(0, to, from); | |
821 | write16( 0x6e0f ); | |
822 | ModRM( 0, to, from); | |
823 | } | |
824 | ||
825 | void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) | |
826 | { | |
827 | write8(0x66); | |
828 | RexRB(0, to, from); | |
829 | write16( 0x6e0f ); | |
830 | WriteRmOffsetFrom(to, from, offset); | |
831 | } | |
832 | ||
833 | void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } | |
834 | void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { | |
835 | if( !cpucaps.hasStreamingSIMD2Extensions ) { | |
836 | SSE2EMU_MOVD_XMM_to_R(to, from); | |
837 | } | |
838 | else { | |
839 | _SSERtoR66(0x7E0F); | |
840 | } | |
841 | } | |
842 | ||
843 | void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) | |
844 | { | |
845 | write8(0x66); | |
846 | RexRB(0, from, to); | |
847 | write16( 0x7e0f ); | |
848 | ModRM( 0, from, to ); | |
849 | } | |
850 | ||
851 | void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) | |
852 | { | |
853 | if( !cpucaps.hasStreamingSIMD2Extensions ) { | |
854 | SSE2EMU_MOVD_XMM_to_RmOffset(to, from, offset); | |
855 | } | |
856 | else { | |
857 | write8(0x66); | |
858 | RexRB(0, from, to); | |
859 | write16( 0x7e0f ); | |
860 | WriteRmOffsetFrom(from, to, offset); | |
861 | } | |
862 | } | |
863 | ||
864 | #ifdef __x86_64__ | |
865 | void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ) | |
866 | { | |
867 | assert( from < XMMREGS); | |
868 | write8( 0x66 ); | |
869 | RexRB(1, from, to); | |
870 | write16( 0x7e0f ); | |
871 | ModRM( 3, from, to ); | |
872 | } | |
873 | ||
874 | void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ) | |
875 | { | |
876 | assert( to < XMMREGS); | |
877 | write8(0x66); | |
878 | RexRB(1, to, from); | |
879 | write16( 0x6e0f ); | |
880 | ModRM( 3, to, from ); | |
881 | } | |
882 | ||
883 | #endif | |
884 | ||
885 | //////////////////////////////////////////////////////////////////////////////////// | |
886 | //**********************************************************************************/ | |
887 | //POR : SSE Bitwise OR * | |
888 | //********************************************************************************** | |
889 | void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } | |
890 | void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } | |
891 | ||
892 | // logical and to &= from | |
893 | void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } | |
894 | void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } | |
895 | ||
896 | // to = (~to) & from | |
897 | void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } | |
898 | void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } | |
899 | ||
900 | ///////////////////////////////////////////////////////////////////////////////////// | |
901 | //**********************************************************************************/ | |
902 | //PXOR : SSE Bitwise XOR * | |
903 | //********************************************************************************** | |
904 | void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEF0F ); } | |
905 | void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEF0F ); } | |
906 | /////////////////////////////////////////////////////////////////////////////////////// | |
907 | ||
908 | void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) {SSEMtoR66(0x6F0F); } | |
909 | void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ){SSERtoM66(0x7F0F);} | |
910 | void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSERtoR66(0x6F0F); } | |
911 | ||
912 | void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x6F0F, 0); } | |
913 | void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { SSE_SS_RtoM(0x7F0F, 0); } | |
914 | void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x6F0F); } | |
915 | ||
916 | // shift right logical | |
917 | ||
918 | void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } | |
919 | void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } | |
920 | void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) | |
921 | { | |
922 | write8( 0x66 ); | |
923 | RexB(0, to); | |
924 | write16( 0x710F ); | |
925 | ModRM( 3, 2 , to ); | |
926 | write8( imm8 ); | |
927 | } | |
928 | ||
929 | void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } | |
930 | void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } | |
931 | void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) | |
932 | { | |
933 | write8( 0x66 ); | |
934 | RexB(0, to); | |
935 | write16( 0x720F ); | |
936 | ModRM( 3, 2 , to ); | |
937 | write8( imm8 ); | |
938 | } | |
939 | ||
940 | void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } | |
941 | void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } | |
942 | void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) | |
943 | { | |
944 | write8( 0x66 ); | |
945 | RexB(0, to); | |
946 | write16( 0x730F ); | |
947 | ModRM( 3, 2 , to ); | |
948 | write8( imm8 ); | |
949 | } | |
950 | ||
951 | void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) | |
952 | { | |
953 | write8( 0x66 ); | |
954 | RexB(0, to); | |
955 | write16( 0x730F ); | |
956 | ModRM( 3, 3 , to ); | |
957 | write8( imm8 ); | |
958 | } | |
959 | ||
960 | // shift right arithmetic | |
961 | ||
962 | void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } | |
963 | void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } | |
964 | void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) | |
965 | { | |
966 | write8( 0x66 ); | |
967 | RexB(0, to); | |
968 | write16( 0x710F ); | |
969 | ModRM( 3, 4 , to ); | |
970 | write8( imm8 ); | |
971 | } | |
972 | ||
973 | void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } | |
974 | void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } | |
975 | void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) | |
976 | { | |
977 | write8( 0x66 ); | |
978 | RexB(0, to); | |
979 | write16( 0x720F ); | |
980 | ModRM( 3, 4 , to ); | |
981 | write8( imm8 ); | |
982 | } | |
983 | ||
984 | // shift left logical | |
985 | ||
986 | void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } | |
987 | void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } | |
988 | void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) | |
989 | { | |
990 | write8( 0x66 ); | |
991 | RexB(0, to); | |
992 | write16( 0x710F ); | |
993 | ModRM( 3, 6 , to ); | |
994 | write8( imm8 ); | |
995 | } | |
996 | ||
997 | void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } | |
998 | void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } | |
999 | void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) | |
1000 | { | |
1001 | write8( 0x66 ); | |
1002 | RexB(0, to); | |
1003 | write16( 0x720F ); | |
1004 | ModRM( 3, 6 , to ); | |
1005 | write8( imm8 ); | |
1006 | } | |
1007 | ||
1008 | void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } | |
1009 | void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } | |
1010 | void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) | |
1011 | { | |
1012 | write8( 0x66 ); | |
1013 | RexB(0, to); | |
1014 | write16( 0x730F ); | |
1015 | ModRM( 3, 6 , to ); | |
1016 | write8( imm8 ); | |
1017 | } | |
1018 | ||
1019 | void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) | |
1020 | { | |
1021 | write8( 0x66 ); | |
1022 | RexB(0, to); | |
1023 | write16( 0x730F ); | |
1024 | ModRM( 3, 7 , to ); | |
1025 | write8( imm8 ); | |
1026 | } | |
1027 | ||
1028 | ||
1029 | void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEE0F ); } | |
1030 | void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEE0F ); } | |
1031 | ||
1032 | void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDE0F ); } | |
1033 | void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDE0F ); } | |
1034 | ||
1035 | void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEA0F ); } | |
1036 | void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEA0F ); } | |
1037 | ||
1038 | void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDA0F ); } | |
1039 | void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDA0F ); } | |
1040 | ||
1041 | // | |
1042 | ||
1043 | void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEC0F ); } | |
1044 | void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEC0F ); } | |
1045 | ||
1046 | void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xED0F ); } | |
1047 | void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xED0F ); } | |
1048 | ||
1049 | void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE80F ); } | |
1050 | void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE80F ); } | |
1051 | ||
1052 | void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE90F ); } | |
1053 | void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE90F ); } | |
1054 | ||
1055 | void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } | |
1056 | void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } | |
1057 | void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } | |
1058 | void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } | |
1059 | ||
1060 | void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } | |
1061 | void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } | |
1062 | void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } | |
1063 | void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } | |
1064 | ||
1065 | //**********************************************************************************/ | |
1066 | //PACKSSWB,PACKSSDW: Pack Saturate Signed Word | |
1067 | //********************************************************************************** | |
1068 | void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } | |
1069 | void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } | |
1070 | void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } | |
1071 | void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } | |
1072 | ||
1073 | void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } | |
1074 | void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } | |
1075 | ||
1076 | //**********************************************************************************/ | |
1077 | //PUNPCKHWD: Unpack 16bit high | |
1078 | //********************************************************************************** | |
1079 | void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } | |
1080 | void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } | |
1081 | ||
1082 | void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } | |
1083 | void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } | |
1084 | ||
1085 | void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } | |
1086 | void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } | |
1087 | void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } | |
1088 | void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } | |
1089 | ||
1090 | void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } | |
1091 | void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } | |
1092 | void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } | |
1093 | void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } | |
1094 | ||
1095 | void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } | |
1096 | void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } | |
1097 | ||
1098 | void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } | |
1099 | void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } | |
1100 | ||
1101 | void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } | |
1102 | void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } | |
1103 | void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } | |
1104 | void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } | |
1105 | ||
1106 | void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } | |
1107 | void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } | |
1108 | ||
1109 | void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } | |
1110 | ||
1111 | void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } | |
1112 | void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } | |
1113 | ||
1114 | void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } | |
1115 | void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from){ SSEMtoRv( 3, 0x7c0fF2, 0 ); } | |
1116 | ||
1117 | void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { | |
1118 | write8(0xf3); | |
1119 | RexRB(0, to, from); | |
1120 | write16( 0x120f); | |
1121 | ModRM( 3, to, from ); | |
1122 | } | |
1123 | ||
1124 | void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x120f, 0); } | |
1125 | void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x160f); } | |
1126 | void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x160f, 0); } | |
1127 | ||
1128 | // SSE-X | |
1129 | void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) | |
1130 | { | |
1131 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from); | |
1132 | else SSE_MOVAPS_M128_to_XMM(to, from); | |
1133 | } | |
1134 | ||
1135 | void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) | |
1136 | { | |
1137 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from); | |
1138 | else SSE_MOVAPS_XMM_to_M128(to, from); | |
1139 | } | |
1140 | ||
1141 | void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
1142 | { | |
1143 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from); | |
1144 | else SSE_MOVAPS_XMM_to_XMM(to, from); | |
1145 | } | |
1146 | ||
1147 | void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) | |
1148 | { | |
1149 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset); | |
1150 | else SSE_MOVAPSRmtoROffset(to, from, offset); | |
1151 | } | |
1152 | ||
1153 | void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) | |
1154 | { | |
1155 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset); | |
1156 | else SSE_MOVAPSRtoRmOffset(to, from, offset); | |
1157 | } | |
1158 | ||
1159 | void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) | |
1160 | { | |
1161 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from); | |
1162 | else SSE_MOVAPS_M128_to_XMM(to, from); | |
1163 | } | |
1164 | ||
1165 | void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ) | |
1166 | { | |
1167 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from); | |
1168 | else SSE_MOVAPS_XMM_to_M128(to, from); | |
1169 | } | |
1170 | ||
1171 | void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
1172 | { | |
1173 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from); | |
1174 | else SSE_MOVAPS_XMM_to_XMM(to, from); | |
1175 | } | |
1176 | ||
1177 | void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) | |
1178 | { | |
1179 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from); | |
1180 | else SSE_MOVSS_M32_to_XMM(to, from); | |
1181 | } | |
1182 | ||
1183 | void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) | |
1184 | { | |
1185 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from); | |
1186 | else SSE_MOVSS_XMM_to_M32(to, from); | |
1187 | } | |
1188 | ||
1189 | void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) | |
1190 | { | |
1191 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from); | |
1192 | else SSE_MOVSS_XMM_to_Rm(to, from); | |
1193 | } | |
1194 | ||
1195 | void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) | |
1196 | { | |
1197 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset); | |
1198 | else SSE_MOVSS_RmOffset_to_XMM(to, from, offset); | |
1199 | } | |
1200 | ||
1201 | void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) | |
1202 | { | |
1203 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset); | |
1204 | else SSE_MOVSS_XMM_to_RmOffset(to, from, offset); | |
1205 | } | |
1206 | ||
1207 | void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) | |
1208 | { | |
1209 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from); | |
1210 | else SSE_ORPS_M128_to_XMM(to, from); | |
1211 | } | |
1212 | ||
1213 | void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
1214 | { | |
1215 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from); | |
1216 | else SSE_ORPS_XMM_to_XMM(to, from); | |
1217 | } | |
1218 | ||
1219 | void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) | |
1220 | { | |
1221 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from); | |
1222 | else SSE_XORPS_M128_to_XMM(to, from); | |
1223 | } | |
1224 | ||
1225 | void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
1226 | { | |
1227 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from); | |
1228 | else SSE_XORPS_XMM_to_XMM(to, from); | |
1229 | } | |
1230 | ||
1231 | void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ) | |
1232 | { | |
1233 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from); | |
1234 | else SSE_ANDPS_M128_to_XMM(to, from); | |
1235 | } | |
1236 | ||
1237 | void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
1238 | { | |
1239 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from); | |
1240 | else SSE_ANDPS_XMM_to_XMM(to, from); | |
1241 | } | |
1242 | ||
1243 | void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) | |
1244 | { | |
1245 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from); | |
1246 | else SSE_ANDNPS_M128_to_XMM(to, from); | |
1247 | } | |
1248 | ||
1249 | void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
1250 | { | |
1251 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from); | |
1252 | else SSE_ANDNPS_XMM_to_XMM(to, from); | |
1253 | } | |
1254 | ||
1255 | void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) | |
1256 | { | |
1257 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from); | |
1258 | else SSE_UNPCKLPS_M128_to_XMM(to, from); | |
1259 | } | |
1260 | ||
1261 | void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) | |
1262 | { | |
1263 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from); | |
1264 | else SSE_UNPCKLPS_XMM_to_XMM(to, from); | |
1265 | } | |
1266 | ||
1267 | void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) | |
1268 | { | |
1269 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from); | |
1270 | else SSE_UNPCKHPS_M128_to_XMM(to, from); | |
1271 | } | |
1272 | ||
1273 | void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) | |
1274 | { | |
1275 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from); | |
1276 | else SSE_UNPCKHPS_XMM_to_XMM(to, from); | |
1277 | } | |
1278 | ||
1279 | void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) | |
1280 | { | |
1281 | if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) { | |
1282 | SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from); | |
1283 | if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e); | |
1284 | } | |
1285 | else { | |
1286 | SSE_MOVHLPS_XMM_to_XMM(to, from); | |
1287 | } | |
1288 | } | |
1289 | ||
1290 | // SSE2 emulation | |
1291 | void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) | |
1292 | { | |
1293 | SSE_SHUFPS_XMM_to_XMM(to, from, 0x4e); | |
1294 | SSE_SHUFPS_XMM_to_XMM(to, to, 0x4e); | |
1295 | } | |
1296 | ||
1297 | void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from) | |
1298 | { | |
1299 | SSE_XORPS_XMM_to_XMM(to, to); | |
1300 | SSE_MOVLPS_M64_to_XMM(to, from); | |
1301 | } | |
1302 | ||
1303 | void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from) | |
1304 | { | |
1305 | SSE_XORPS_XMM_to_XMM(to, to); | |
1306 | SSE2EMU_MOVSD_XMM_to_XMM(to, from); | |
1307 | } | |
1308 | ||
1309 | void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) | |
1310 | { | |
1311 | MOV32RmtoROffset(EAX, from, offset); | |
1312 | MOV32ItoM((uptr)p+4, 0); | |
1313 | MOV32ItoM((uptr)p+8, 0); | |
1314 | MOV32RtoM((uptr)p, EAX); | |
1315 | MOV32ItoM((uptr)p+12, 0); | |
1316 | SSE_MOVAPS_M128_to_XMM(to, (uptr)p); | |
1317 | } | |
1318 | ||
1319 | void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset ) | |
1320 | { | |
1321 | SSE_MOVSS_XMM_to_M32((uptr)p, from); | |
1322 | MOV32MtoR(EAX, (uptr)p); | |
1323 | MOV32RtoRmOffset(to, EAX, offset); | |
1324 | } | |
1325 | ||
1326 | #ifndef __x86_64__ | |
1327 | extern void SetMMXstate(); | |
1328 | ||
1329 | void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) | |
1330 | { | |
1331 | SSE_MOVLPS_XMM_to_M64(p, from); | |
1332 | MOVQMtoR(to, p); | |
1333 | SetMMXstate(); | |
1334 | } | |
1335 | ||
1336 | void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) | |
1337 | { | |
1338 | MOVQRtoM(p, from); | |
1339 | SSE_MOVLPS_M64_to_XMM(to, p); | |
1340 | SetMMXstate(); | |
1341 | } | |
1342 | #endif | |
1343 | ||
1344 | /****************************************************************************/ | |
1345 | /* SSE2 Emulated functions for SSE CPU's by kekko */ | |
1346 | /****************************************************************************/ | |
1347 | void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { | |
1348 | MOV64ItoR(EAX, (uptr)&p); | |
1349 | MOV64ItoR(EBX, (uptr)&p2); | |
1350 | SSE_MOVUPSRtoRm(EAX, from); | |
1351 | ||
1352 | MOV32ItoR(ECX, (u32)imm8); | |
1353 | AND32ItoR(ECX, 3); | |
1354 | SHL32ItoR(ECX, 2); | |
1355 | ADD32RtoR(ECX, EAX); | |
1356 | MOV32RmtoR(ECX, ECX); | |
1357 | MOV32RtoRm(EBX, ECX); | |
1358 | ||
1359 | ADD32ItoR(EBX, 4); | |
1360 | MOV32ItoR(ECX, (u32)imm8); | |
1361 | SHR32ItoR(ECX, 2); | |
1362 | AND32ItoR(ECX, 3); | |
1363 | SHL32ItoR(ECX, 2); | |
1364 | ADD32RtoR(ECX, EAX); | |
1365 | MOV32RmtoR(ECX, ECX); | |
1366 | MOV32RtoRm(EBX, ECX); | |
1367 | ||
1368 | ADD32ItoR(EBX, 4); | |
1369 | MOV32ItoR(ECX, (u32)imm8); | |
1370 | SHR32ItoR(ECX, 4); | |
1371 | AND32ItoR(ECX, 3); | |
1372 | SHL32ItoR(ECX, 2); | |
1373 | ADD32RtoR(ECX, EAX); | |
1374 | MOV32RmtoR(ECX, ECX); | |
1375 | MOV32RtoRm(EBX, ECX); | |
1376 | ||
1377 | ADD32ItoR(EBX, 4); | |
1378 | MOV32ItoR(ECX, (u32)imm8); | |
1379 | SHR32ItoR(ECX, 6); | |
1380 | AND32ItoR(ECX, 3); | |
1381 | SHL32ItoR(ECX, 2); | |
1382 | ADD32RtoR(ECX, EAX); | |
1383 | MOV32RmtoR(ECX, ECX); | |
1384 | MOV32RtoRm(EBX, ECX); | |
1385 | ||
1386 | SUB32ItoR(EBX, 12); | |
1387 | ||
1388 | SSE_MOVUPSRmtoR(to, EBX); | |
1389 | } | |
1390 | ||
1391 | void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { | |
1392 | /* XXX? */ | |
1393 | MOV64ItoR(to, (uptr)&p); | |
1394 | SSE_MOVUPSRtoRm(to, from); | |
1395 | MOV32RmtoR(to, to); | |
1396 | } | |
1397 | ||
1398 | #ifndef __x86_64__ | |
1399 | extern void SetFPUstate(); | |
1400 | extern void _freeMMXreg(int mmxreg); | |
1401 | #endif | |
1402 | ||
1403 | void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { | |
1404 | #ifndef __x86_64__ | |
1405 | SetFPUstate(); | |
1406 | _freeMMXreg(7); | |
1407 | #endif | |
1408 | SSE_MOVAPS_XMM_to_M128((uptr)f, from); | |
1409 | ||
1410 | FLD32((uptr)&f[0]); | |
1411 | FISTP32((uptr)&p2[0]); | |
1412 | FLD32((uptr)&f[1]); | |
1413 | FISTP32((uptr)&p2[1]); | |
1414 | FLD32((uptr)&f[2]); | |
1415 | FISTP32((uptr)&p2[2]); | |
1416 | FLD32((uptr)&f[3]); | |
1417 | FISTP32((uptr)&p2[3]); | |
1418 | ||
1419 | SSE_MOVAPS_M128_to_XMM(to, (uptr)p2); | |
1420 | } | |
1421 | ||
1422 | void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { | |
1423 | #ifndef __x86_64__ | |
1424 | SetFPUstate(); | |
1425 | _freeMMXreg(7); | |
1426 | #endif | |
1427 | FILD32(from); | |
1428 | FSTP32((uptr)&f[0]); | |
1429 | FILD32(from+4); | |
1430 | FSTP32((uptr)&f[1]); | |
1431 | FILD32(from+8); | |
1432 | FSTP32((uptr)&f[2]); | |
1433 | FILD32(from+12); | |
1434 | FSTP32((uptr)&f[3]); | |
1435 | ||
1436 | SSE_MOVAPS_M128_to_XMM(to, (uptr)f); | |
1437 | } | |
1438 | ||
1439 | void SSE2EMU_MOVD_XMM_to_M32( uptr to, x86SSERegType from ) { | |
1440 | /* XXX? */ | |
1441 | MOV64ItoR(EAX, (uptr)&p); | |
1442 | SSE_MOVUPSRtoRm(EAX, from); | |
1443 | MOV32RmtoR(EAX, EAX); | |
1444 | MOV32RtoM(to, EAX); | |
1445 | } | |
1446 | ||
1447 | void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { | |
1448 | MOV32ItoM((uptr)p+4, 0); | |
1449 | MOV32ItoM((uptr)p+8, 0); | |
1450 | MOV32RtoM((uptr)p, from); | |
1451 | MOV32ItoM((uptr)p+12, 0); | |
1452 | SSE_MOVAPS_M128_to_XMM(to, (uptr)p); | |
1453 | } | |
1454 | ||
1455 | #endif |