dma: don't generate irqs after aborted DMA
[pcsx_rearmed.git] / libpcsxcore / ix86_64 / ix86_sse.c
CommitLineData
ef79bbde
P
1// stop compiling if NORECBUILD build (only for Visual Studio)
2#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD))
3
4#include <assert.h>
5#include "ix86-64.h"
6
7PCSX2_ALIGNED16(static unsigned int p[4]);
8PCSX2_ALIGNED16(static unsigned int p2[4]);
9PCSX2_ALIGNED16(static float f[4]);
10
11
12XMMSSEType g_xmmtypes[XMMREGS] = {0};
13
14/********************/
15/* SSE instructions */
16/********************/
17
18#define SSEMtoRv( nc, code, overb ) \
19 assert( cpucaps.hasStreamingSIMDExtensions ); \
20 assert( to < XMMREGS ) ; \
21 MEMADDR_OP(0, nc, code, true, to, from, overb)
22
23#define SSEMtoR( code, overb ) SSEMtoRv(2, code, overb)
24
25#define SSERtoMv( nc, code, overb ) \
26 assert( cpucaps.hasStreamingSIMDExtensions ); \
27 assert( from < XMMREGS) ; \
28 MEMADDR_OP(0, nc, code, true, from, to, overb)
29
30#define SSERtoM( code, overb ) SSERtoMv( 2, code, overb ) \
31
32#define SSE_SS_MtoR( code, overb ) \
33 SSEMtoRv(3, (code << 8) | 0xF3, overb)
34
35#define SSE_SS_RtoM( code, overb ) \
36 SSERtoMv(3, (code << 8) | 0xF3, overb)
37
38#define SSERtoR( code ) \
39 assert( cpucaps.hasStreamingSIMDExtensions ); \
40 assert( to < XMMREGS && from < XMMREGS) ; \
41 RexRB(0, to, from); \
42 write16( code ); \
43 ModRM( 3, to, from );
44
45#define SSEMtoR66( code ) \
46 SSEMtoRv( 3, (code << 8) | 0x66, 0 )
47
48#define SSERtoM66( code ) \
49 SSERtoMv( 3, (code << 8) | 0x66, 0 )
50
51#define SSERtoR66( code ) \
52 write8( 0x66 ); \
53 SSERtoR( code );
54
55#define _SSERtoR66( code ) \
56 assert( cpucaps.hasStreamingSIMDExtensions ); \
57 assert( to < XMMREGS && from < XMMREGS) ; \
58 write8( 0x66 ); \
59 RexRB(0, from, to); \
60 write16( code ); \
61 ModRM( 3, from, to );
62
63#define SSE_SS_RtoR( code ) \
64 assert( cpucaps.hasStreamingSIMDExtensions ); \
65 assert( to < XMMREGS && from < XMMREGS) ; \
66 write8( 0xf3 ); \
67 RexRB(0, to, from); \
68 write16( code ); \
69 ModRM( 3, to, from );
70
71#define CMPPSMtoR( op ) \
72 SSEMtoR( 0xc20f, 1 ); \
73 write8( op );
74
75#define CMPPSRtoR( op ) \
76 SSERtoR( 0xc20f ); \
77 write8( op );
78
79#define CMPSSMtoR( op ) \
80 SSE_SS_MtoR( 0xc20f, 1 ); \
81 write8( op );
82
83#define CMPSSRtoR( op ) \
84 SSE_SS_RtoR( 0xc20f ); \
85 write8( op );
86
87
88
89void WriteRmOffset(x86IntRegType to, int offset);
90void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset);
91
92/* movups [r32][r32*scale] to xmm1 */
93void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
94{
95 assert( cpucaps.hasStreamingSIMDExtensions );
96 RexRXB(0, to, from2, from);
97 write16( 0x100f );
98 ModRM( 0, to, 0x4 );
99 SibSB( scale, from2, from );
100}
101
102/* movups xmm1 to [r32][r32*scale] */
103void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
104{
105 assert( cpucaps.hasStreamingSIMDExtensions );
106 RexRXB(1, to, from2, from);
107 write16( 0x110f );
108 ModRM( 0, to, 0x4 );
109 SibSB( scale, from2, from );
110}
111
112/* movups [r32] to r32 */
113void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from )
114{
115 assert( cpucaps.hasStreamingSIMDExtensions );
116 RexRB(0, to, from);
117 write16( 0x100f );
118 ModRM( 0, to, from );
119}
120
121/* movups r32 to [r32] */
122void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from )
123{
124 assert( cpucaps.hasStreamingSIMDExtensions );
125 RexRB(0, from, to);
126 write16( 0x110f );
127 ModRM( 0, from, to );
128}
129
130/* movlps [r32] to r32 */
131void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from )
132{
133 assert( cpucaps.hasStreamingSIMDExtensions );
134 RexRB(1, to, from);
135 write16( 0x120f );
136 ModRM( 0, to, from );
137}
138
139void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
140{
141 assert( cpucaps.hasStreamingSIMDExtensions );
142 RexRB(0, to, from);
143 write16( 0x120f );
144 WriteRmOffsetFrom(to, from, offset);
145}
146
147/* movaps r32 to [r32] */
148void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from )
149{
150 assert( cpucaps.hasStreamingSIMDExtensions );
151 RexRB(0, from, to);
152 write16( 0x130f );
153 ModRM( 0, from, to );
154}
155
156void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
157{
158 assert( cpucaps.hasStreamingSIMDExtensions );
159 RexRB(0, from, to);
160 write16( 0x130f );
161 WriteRmOffsetFrom(from, to, offset);
162}
163
164/* movaps [r32][r32*scale] to xmm1 */
165void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
166{
167 assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
168 RexRXB(0, to, from2, from);
169 write16( 0x280f );
170 ModRM( 0, to, 0x4 );
171 SibSB( scale, from2, from );
172}
173
174/* movaps xmm1 to [r32][r32*scale] */
175void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
176{
177 assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
178 RexRXB(0, to, from2, from);
179 write16( 0x290f );
180 ModRM( 0, to, 0x4 );
181 SibSB( scale, from2, from );
182}
183
184// movaps [r32+offset] to r32
185void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
186{
187 assert( cpucaps.hasStreamingSIMDExtensions );
188 RexRB(0, to, from);
189 write16( 0x280f );
190 WriteRmOffsetFrom(to, from, offset);
191}
192
193// movaps r32 to [r32+offset]
194void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
195{
196 assert( cpucaps.hasStreamingSIMDExtensions );
197 RexRB(0, from, to);
198 write16( 0x290f );
199 WriteRmOffsetFrom(from, to, offset);
200}
201
202// movdqa [r32+offset] to r32
203void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
204{
205 assert( cpucaps.hasStreamingSIMDExtensions );
206 write8(0x66);
207 RexRB(0, to, from);
208 write16( 0x6f0f );
209 WriteRmOffsetFrom(to, from, offset);
210}
211
212// movdqa r32 to [r32+offset]
213void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
214{
215 assert( cpucaps.hasStreamingSIMDExtensions );
216 write8(0x66);
217 RexRB(0, from, to);
218 write16( 0x7f0f );
219 WriteRmOffsetFrom(from, to, offset);
220}
221
222// movups [r32+offset] to r32
223void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
224{
225 RexRB(0, to, from);
226 write16( 0x100f );
227 WriteRmOffsetFrom(to, from, offset);
228}
229
230// movups r32 to [r32+offset]
231void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
232{
233 assert( cpucaps.hasStreamingSIMDExtensions );
234 RexRB(0, from, to);
235 write16( 0x110f );
236 WriteRmOffsetFrom(from, to, offset);
237}
238
239//**********************************************************************************/
240//MOVAPS: Move aligned Packed Single Precision FP values *
241//**********************************************************************************
242void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); }
243void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); }
244void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x280f ); }
245
246void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); }
247void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); }
248
249void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
250{
251 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVSD_XMM_to_XMM(to, from);
252 else {
253 write8(0xf2);
254 SSERtoR( 0x100f);
255 }
256}
257
258void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from )
259{
260 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_M64_to_XMM(to, from);
261 else {
262 SSE_SS_MtoR( 0x7e0f, 0);
263 }
264}
265
266void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
267{
268 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_XMM_to_XMM(to, from);
269 else {
270 SSE_SS_RtoR( 0x7e0f);
271 }
272}
273
274void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from )
275{
276 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVLPS_XMM_to_M64(to, from);
277 else {
278 SSERtoM66(0xd60f);
279 }
280}
281
282#ifndef __x86_64__
283void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
284{
285 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVDQ2Q_XMM_to_MM(to, from);
286 else {
287 write8(0xf2);
288 SSERtoR( 0xd60f);
289 }
290}
291void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
292{
293 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ2DQ_MM_to_XMM(to, from);
294 else {
295 SSE_SS_RtoR( 0xd60f);
296 }
297}
298#endif
299
300//**********************************************************************************/
301//MOVSS: Move Scalar Single-Precision FP value *
302//**********************************************************************************
303void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); }
304void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); }
305void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
306{
307 write8(0xf3);
308 RexRB(0, from, to);
309 write16(0x110f);
310 ModRM(0, from, to);
311}
312
313void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x100f ); }
314
315void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
316{
317 write8(0xf3);
318 RexRB(0, to, from);
319 write16( 0x100f );
320 WriteRmOffsetFrom(to, from, offset);
321}
322
323void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
324{
325 write8(0xf3);
326 RexRB(0, from, to);
327 write16(0x110f);
328 WriteRmOffsetFrom(from, to, offset);
329}
330
331void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); }
332//**********************************************************************************/
333//MOVLPS: Move low Packed Single-Precision FP *
334//**********************************************************************************
335void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); }
336void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); }
337
338void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
339{
340 assert( cpucaps.hasStreamingSIMDExtensions );
341 RexRB(0, to, from);
342 write16( 0x120f );
343 WriteRmOffsetFrom(to, from, offset);
344}
345
346void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
347{
348 RexRB(0, from, to);
349 write16(0x130f);
350 WriteRmOffsetFrom(from, to, offset);
351}
352
353/////////////////////////////////////////////////////////////////////////////////////
354//**********************************************************************************/
355//MOVHPS: Move High Packed Single-Precision FP *
356//**********************************************************************************
357void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); }
358void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); }
359
360void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
361{
362 assert( cpucaps.hasStreamingSIMDExtensions );
363 RexRB(0, to, from);
364 write16( 0x160f );
365 WriteRmOffsetFrom(to, from, offset);
366}
367
368void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
369{
370 assert( cpucaps.hasStreamingSIMDExtensions );
371 RexRB(0, from, to);
372 write16(0x170f);
373 WriteRmOffsetFrom(from, to, offset);
374}
375
376/////////////////////////////////////////////////////////////////////////////////////
377//**********************************************************************************/
378//MOVLHPS: Moved packed Single-Precision FP low to high *
379//**********************************************************************************
380void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); }
381
382//////////////////////////////////////////////////////////////////////////////////////
383//**********************************************************************************/
384//MOVHLPS: Moved packed Single-Precision FP High to Low *
385//**********************************************************************************
386void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); }
387
388///////////////////////////////////////////////////////////////////////////////////
389//**********************************************************************************/
390//ANDPS: Logical Bit-wise AND for Single FP *
391//**********************************************************************************
392void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); }
393void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); }
394
395///////////////////////////////////////////////////////////////////////////////////////
396//**********************************************************************************/
397//ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values *
398//**********************************************************************************
399void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); }
400void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); }
401
402/////////////////////////////////////////////////////////////////////////////////////
403//**********************************************************************************/
404//RCPPS : Packed Single-Precision FP Reciprocal *
405//**********************************************************************************
406void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); }
407void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); }
408
409void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); }
410void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); }
411
412//////////////////////////////////////////////////////////////////////////////////////
413//**********************************************************************************/
414//ORPS : Bit-wise Logical OR of Single-Precision FP Data *
415//**********************************************************************************
416void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); }
417void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); }
418
419/////////////////////////////////////////////////////////////////////////////////////
420//**********************************************************************************/
421//XORPS : Bitwise Logical XOR of Single-Precision FP Values *
422//**********************************************************************************
423void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); }
424void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); }
425
426///////////////////////////////////////////////////////////////////////////////////////
427//**********************************************************************************/
428//ADDPS : ADD Packed Single-Precision FP Values *
429//**********************************************************************************
430void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); }
431void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); }
432
433////////////////////////////////////////////////////////////////////////////////////
434//**********************************************************************************/
435//ADDSS : ADD Scalar Single-Precision FP Values *
436//**********************************************************************************
437void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); }
438void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); }
439
440/////////////////////////////////////////////////////////////////////////////////////////
441//**********************************************************************************/
442//SUBPS: Packed Single-Precision FP Subtract *
443//**********************************************************************************
444void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); }
445void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); }
446
447///////////////////////////////////////////////////////////////////////////////////////
448//**********************************************************************************/
449//SUBSS : Scalar Single-Precision FP Subtract *
450//**********************************************************************************
451void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); }
452void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); }
453
454/////////////////////////////////////////////////////////////////////////////////////////
455//**********************************************************************************/
456//MULPS : Packed Single-Precision FP Multiply *
457//**********************************************************************************
458void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); }
459void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); }
460
461////////////////////////////////////////////////////////////////////////////////////////
462//**********************************************************************************/
463//MULSS : Scalar Single-Precision FP Multiply *
464//**********************************************************************************
465void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); }
466void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); }
467
468////////////////////////////////////////////////////////////////////////////////////////////
469//**********************************************************************************/
470//Packed Single-Precission FP compare (CMPccPS) *
471//**********************************************************************************
472//missing SSE_CMPPS_I8_to_XMM
473// SSE_CMPPS_M32_to_XMM
474// SSE_CMPPS_XMM_to_XMM
475void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); }
476void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); }
477void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); }
478void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); }
479void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); }
480void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); }
481void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); }
482void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); }
483void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); }
484void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); }
485void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); }
486void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); }
487void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); }
488void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); }
489void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); }
490void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); }
491
492///////////////////////////////////////////////////////////////////////////////////////////
493//**********************************************************************************/
494//Scalar Single-Precission FP compare (CMPccSS) *
495//**********************************************************************************
496//missing SSE_CMPSS_I8_to_XMM
497// SSE_CMPSS_M32_to_XMM
498// SSE_CMPSS_XMM_to_XMM
499void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); }
500void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); }
501void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); }
502void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); }
503void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); }
504void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); }
505void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); }
506void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); }
507void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); }
508void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); }
509void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); }
510void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); }
511void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); }
512void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); }
513void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); }
514void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); }
515
516void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from )
517{
518 MEMADDR_OP(0, VAROP2(0x0F, 0x2E), true, to, from, 0);
519}
520
521void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
522{
523 RexRB(0, to, from);
524 write16( 0x2e0f );
525 ModRM( 3, to, from );
526}
527
528//////////////////////////////////////////////////////////////////////////////////////////
529//**********************************************************************************/
530//RSQRTPS : Packed Single-Precision FP Square Root Reciprocal *
531//**********************************************************************************
532void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); }
533void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x520f ); }
534
535/////////////////////////////////////////////////////////////////////////////////////
536//**********************************************************************************/
537//RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal *
538//**********************************************************************************
539void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); }
540void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x520f ); }
541
542////////////////////////////////////////////////////////////////////////////////////
543//**********************************************************************************/
544//SQRTPS : Packed Single-Precision FP Square Root *
545//**********************************************************************************
546void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); }
547void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x510f ); }
548
549//////////////////////////////////////////////////////////////////////////////////////
550//**********************************************************************************/
551//SQRTSS : Scalar Single-Precision FP Square Root *
552//**********************************************************************************
553void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); }
554void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x510f ); }
555
556////////////////////////////////////////////////////////////////////////////////////////
557//**********************************************************************************/
558//MAXPS: Return Packed Single-Precision FP Maximum *
559//**********************************************************************************
560void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); }
561void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); }
562
563/////////////////////////////////////////////////////////////////////////////////////////
564//**********************************************************************************/
565//MAXSS: Return Scalar Single-Precision FP Maximum *
566//**********************************************************************************
567void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); }
568void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); }
569
570#ifndef __x86_64__
571/////////////////////////////////////////////////////////////////////////////////////////
572//**********************************************************************************/
573//CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion *
574//**********************************************************************************
575void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); }
576void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); }
577
578///////////////////////////////////////////////////////////////////////////////////////////
579//**********************************************************************************/
580//CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion *
581//**********************************************************************************
582void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); }
583void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); }
584#endif
585
586void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { SSE_SS_MtoR(0x2c0f, 0); }
587void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from)
588{
589 write8(0xf3);
590 RexRB(0, to, from);
591 write16(0x2c0f);
592 ModRM(3, to, from);
593}
594
595void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x2a0f, 0); }
596void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from)
597{
598 write8(0xf3);
599 RexRB(0, to, from);
600 write16(0x2a0f);
601 ModRM(3, to, from);
602}
603
604///////////////////////////////////////////////////////////////////////////////////////////
605//**********************************************************************************/
606//CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion *
607//**********************************************************************************
608void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); }
609void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); }
610
611//**********************************************************************************/
612//CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion *
613//**********************************************************************************
614void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); }
615void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); }
616
617void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x5b0f); }
618/////////////////////////////////////////////////////////////////////////////////////
619//**********************************************************************************/
620//MINPS: Return Packed Single-Precision FP Minimum *
621//**********************************************************************************
622void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); }
623void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); }
624
625//////////////////////////////////////////////////////////////////////////////////////////
626//**********************************************************************************/
627//MINSS: Return Scalar Single-Precision FP Minimum *
628//**********************************************************************************
629void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); }
630void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); }
631
632#ifndef __x86_64__
633///////////////////////////////////////////////////////////////////////////////////////////
634//**********************************************************************************/
635//PMAXSW: Packed Signed Integer Word Maximum *
636//**********************************************************************************
637//missing
638 // SSE_PMAXSW_M64_to_MM
639// SSE2_PMAXSW_M128_to_XMM
640// SSE2_PMAXSW_XMM_to_XMM
641void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); }
642
643///////////////////////////////////////////////////////////////////////////////////////
644//**********************************************************************************/
645//PMINSW: Packed Signed Integer Word Minimum *
646//**********************************************************************************
647//missing
648 // SSE_PMINSW_M64_to_MM
649// SSE2_PMINSW_M128_to_XMM
650// SSE2_PMINSW_XMM_to_XMM
651void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); }
652#endif
653
654//////////////////////////////////////////////////////////////////////////////////////
655//**********************************************************************************/
656//SHUFPS: Shuffle Packed Single-Precision FP Values *
657//**********************************************************************************
658void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); }
659void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); }
660
661void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 )
662{
663 RexRB(0, to, from);
664 write16(0xc60f);
665 WriteRmOffsetFrom(to, from, offset);
666 write8(imm8);
667}
668
669////////////////////////////////////////////////////////////////////////////////////
670//**********************************************************************************/
671//PSHUFD: Shuffle Packed DoubleWords *
672//**********************************************************************************
673void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
674{
675 if( !cpucaps.hasStreamingSIMD2Extensions ) {
676 SSE2EMU_PSHUFD_XMM_to_XMM(to, from, imm8);
677 }
678 else {
679 SSERtoR66( 0x700F );
680 write8( imm8 );
681 }
682}
683void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv( 3, 0x700F66, 1 ); write8( imm8 ); }
684
685void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); }
686void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv(3, 0x700FF2, 1); write8(imm8); }
687void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSE_SS_RtoR(0x700F); write8(imm8); }
688void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSE_SS_MtoR(0x700F, 1); write8(imm8); }
689
690///////////////////////////////////////////////////////////////////////////////////
691//**********************************************************************************/
692//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data *
693//**********************************************************************************
694void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); }
695void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); }
696
697////////////////////////////////////////////////////////////////////////////////////////
698//**********************************************************************************/
699//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data *
700//**********************************************************************************
701void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); }
702void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); }
703
704////////////////////////////////////////////////////////////////////////////////////////
705//**********************************************************************************/
706//DIVPS : Packed Single-Precision FP Divide *
707//**********************************************************************************
708void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); }
709void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); }
710
711//////////////////////////////////////////////////////////////////////////////////////
712//**********************************************************************************/
713//DIVSS : Scalar Single-Precision FP Divide *
714//**********************************************************************************
715void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); }
716void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); }
717
718/////////////////////////////////////////////////////////////////////////////////////////
719//**********************************************************************************/
720//STMXCSR : Store Streaming SIMD Extension Control/Status *
721//**********************************************************************************
722void SSE_STMXCSR( uptr from ) {
723 MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 3, from, 0);
724}
725
726/////////////////////////////////////////////////////////////////////////////////////
727//**********************************************************************************/
728//LDMXCSR : Load Streaming SIMD Extension Control/Status *
729//**********************************************************************************
730void SSE_LDMXCSR( uptr from ) {
731 MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 2, from, 0);
732}
733
734/////////////////////////////////////////////////////////////////////////////////////
735//**********************************************************************************/
736//PADDB,PADDW,PADDD : Add Packed Integers *
737//**********************************************************************************
738void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); }
739void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); }
740void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); }
741void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); }
742void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); }
743void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); }
744
745void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); }
746void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); }
747
748///////////////////////////////////////////////////////////////////////////////////
749//**********************************************************************************/
750//PCMPxx: Compare Packed Integers *
751//**********************************************************************************
752void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); }
753void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); }
754void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); }
755void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); }
756void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); }
757void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); }
758void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); }
759void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); }
760void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); }
761void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
762void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from )
763{
764 if( !cpucaps.hasStreamingSIMD2Extensions ) {
765 SSE_CMPEQPS_XMM_to_XMM(to, from);
766 }
767 else {
768 SSERtoR66( 0x760F );
769 }
770}
771
772void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from )
773{
774 if( !cpucaps.hasStreamingSIMD2Extensions ) {
775 SSE_CMPEQPS_M128_to_XMM(to, from);
776 }
777 else {
778 SSEMtoR66( 0x760F );
779 }
780}
781
782////////////////////////////////////////////////////////////////////////////////////////////
783//**********************************************************************************/
784//PEXTRW,PINSRW: Packed Extract/Insert Word *
785//**********************************************************************************
786void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
787void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
788
789////////////////////////////////////////////////////////////////////////////////////////////
790//**********************************************************************************/
791//PSUBx: Subtract Packed Integers *
792//**********************************************************************************
793void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); }
794void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); }
795void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); }
796void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); }
797void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); }
798void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); }
799void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); }
800void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); }
801
802///////////////////////////////////////////////////////////////////////////////////////
803//**********************************************************************************/
804//MOVD: Move Dword(32bit) to /from XMM reg *
805//**********************************************************************************
806void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); }
807void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from )
808{
809 if( !cpucaps.hasStreamingSIMD2Extensions ) {
810 SSE2EMU_MOVD_R_to_XMM(to, from);
811 }
812 else {
813 SSERtoR66(0x6E0F);
814 }
815}
816
817void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from )
818{
819 write8(0x66);
820 RexRB(0, to, from);
821 write16( 0x6e0f );
822 ModRM( 0, to, from);
823}
824
825void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
826{
827 write8(0x66);
828 RexRB(0, to, from);
829 write16( 0x6e0f );
830 WriteRmOffsetFrom(to, from, offset);
831}
832
833void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); }
834void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
835 if( !cpucaps.hasStreamingSIMD2Extensions ) {
836 SSE2EMU_MOVD_XMM_to_R(to, from);
837 }
838 else {
839 _SSERtoR66(0x7E0F);
840 }
841}
842
843void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
844{
845 write8(0x66);
846 RexRB(0, from, to);
847 write16( 0x7e0f );
848 ModRM( 0, from, to );
849}
850
851void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
852{
853 if( !cpucaps.hasStreamingSIMD2Extensions ) {
854 SSE2EMU_MOVD_XMM_to_RmOffset(to, from, offset);
855 }
856 else {
857 write8(0x66);
858 RexRB(0, from, to);
859 write16( 0x7e0f );
860 WriteRmOffsetFrom(from, to, offset);
861 }
862}
863
864#ifdef __x86_64__
865void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from )
866{
867 assert( from < XMMREGS);
868 write8( 0x66 );
869 RexRB(1, from, to);
870 write16( 0x7e0f );
871 ModRM( 3, from, to );
872}
873
874void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from )
875{
876 assert( to < XMMREGS);
877 write8(0x66);
878 RexRB(1, to, from);
879 write16( 0x6e0f );
880 ModRM( 3, to, from );
881}
882
883#endif
884
885////////////////////////////////////////////////////////////////////////////////////
886//**********************************************************************************/
887//POR : SSE Bitwise OR *
888//**********************************************************************************
889void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); }
890void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); }
891
892// logical and to &= from
893void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); }
894void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); }
895
896// to = (~to) & from
897void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); }
898void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); }
899
900/////////////////////////////////////////////////////////////////////////////////////
901//**********************************************************************************/
902//PXOR : SSE Bitwise XOR *
903//**********************************************************************************
904void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEF0F ); }
905void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEF0F ); }
906///////////////////////////////////////////////////////////////////////////////////////
907
908void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) {SSEMtoR66(0x6F0F); }
909void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ){SSERtoM66(0x7F0F);}
910void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSERtoR66(0x6F0F); }
911
912void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x6F0F, 0); }
913void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { SSE_SS_RtoM(0x7F0F, 0); }
914void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x6F0F); }
915
916// shift right logical
917
918void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); }
919void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); }
920void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8)
921{
922 write8( 0x66 );
923 RexB(0, to);
924 write16( 0x710F );
925 ModRM( 3, 2 , to );
926 write8( imm8 );
927}
928
929void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); }
930void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); }
931void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8)
932{
933 write8( 0x66 );
934 RexB(0, to);
935 write16( 0x720F );
936 ModRM( 3, 2 , to );
937 write8( imm8 );
938}
939
940void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); }
941void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); }
942void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8)
943{
944 write8( 0x66 );
945 RexB(0, to);
946 write16( 0x730F );
947 ModRM( 3, 2 , to );
948 write8( imm8 );
949}
950
951void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8)
952{
953 write8( 0x66 );
954 RexB(0, to);
955 write16( 0x730F );
956 ModRM( 3, 3 , to );
957 write8( imm8 );
958}
959
960// shift right arithmetic
961
962void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); }
963void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); }
964void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8)
965{
966 write8( 0x66 );
967 RexB(0, to);
968 write16( 0x710F );
969 ModRM( 3, 4 , to );
970 write8( imm8 );
971}
972
973void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); }
974void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); }
975void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8)
976{
977 write8( 0x66 );
978 RexB(0, to);
979 write16( 0x720F );
980 ModRM( 3, 4 , to );
981 write8( imm8 );
982}
983
984// shift left logical
985
986void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); }
987void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); }
988void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8)
989{
990 write8( 0x66 );
991 RexB(0, to);
992 write16( 0x710F );
993 ModRM( 3, 6 , to );
994 write8( imm8 );
995}
996
997void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); }
998void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); }
999void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8)
1000{
1001 write8( 0x66 );
1002 RexB(0, to);
1003 write16( 0x720F );
1004 ModRM( 3, 6 , to );
1005 write8( imm8 );
1006}
1007
1008void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); }
1009void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); }
1010void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8)
1011{
1012 write8( 0x66 );
1013 RexB(0, to);
1014 write16( 0x730F );
1015 ModRM( 3, 6 , to );
1016 write8( imm8 );
1017}
1018
1019void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8)
1020{
1021 write8( 0x66 );
1022 RexB(0, to);
1023 write16( 0x730F );
1024 ModRM( 3, 7 , to );
1025 write8( imm8 );
1026}
1027
1028
1029void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEE0F ); }
1030void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEE0F ); }
1031
1032void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDE0F ); }
1033void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDE0F ); }
1034
1035void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEA0F ); }
1036void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEA0F ); }
1037
1038void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDA0F ); }
1039void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDA0F ); }
1040
1041//
1042
1043void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEC0F ); }
1044void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEC0F ); }
1045
1046void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xED0F ); }
1047void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xED0F ); }
1048
1049void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE80F ); }
1050void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE80F ); }
1051
1052void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE90F ); }
1053void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE90F ); }
1054
1055void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); }
1056void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); }
1057void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); }
1058void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); }
1059
1060void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); }
1061void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); }
1062void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); }
1063void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); }
1064
1065//**********************************************************************************/
1066//PACKSSWB,PACKSSDW: Pack Saturate Signed Word
1067//**********************************************************************************
1068void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); }
1069void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); }
1070void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); }
1071void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); }
1072
1073void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); }
1074void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); }
1075
1076//**********************************************************************************/
1077//PUNPCKHWD: Unpack 16bit high
1078//**********************************************************************************
1079void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); }
1080void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); }
1081
1082void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); }
1083void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); }
1084
1085void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); }
1086void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); }
1087void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); }
1088void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); }
1089
1090void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); }
1091void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); }
1092void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); }
1093void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); }
1094
1095void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); }
1096void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); }
1097
1098void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); }
1099void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); }
1100
1101void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); }
1102void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); }
1103void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); }
1104void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); }
1105
1106void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); }
1107void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); }
1108
1109void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); }
1110
1111void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); }
1112void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); }
1113
1114void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); }
1115void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from){ SSEMtoRv( 3, 0x7c0fF2, 0 ); }
1116
1117void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
1118 write8(0xf3);
1119 RexRB(0, to, from);
1120 write16( 0x120f);
1121 ModRM( 3, to, from );
1122}
1123
1124void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x120f, 0); }
1125void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x160f); }
1126void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x160f, 0); }
1127
1128// SSE-X
1129void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
1130{
1131 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
1132 else SSE_MOVAPS_M128_to_XMM(to, from);
1133}
1134
1135void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
1136{
1137 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
1138 else SSE_MOVAPS_XMM_to_M128(to, from);
1139}
1140
1141void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1142{
1143 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
1144 else SSE_MOVAPS_XMM_to_XMM(to, from);
1145}
1146
1147void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
1148{
1149 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
1150 else SSE_MOVAPSRmtoROffset(to, from, offset);
1151}
1152
1153void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
1154{
1155 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
1156 else SSE_MOVAPSRtoRmOffset(to, from, offset);
1157}
1158
1159void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
1160{
1161 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
1162 else SSE_MOVAPS_M128_to_XMM(to, from);
1163}
1164
1165void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
1166{
1167 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
1168 else SSE_MOVAPS_XMM_to_M128(to, from);
1169}
1170
1171void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1172{
1173 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
1174 else SSE_MOVAPS_XMM_to_XMM(to, from);
1175}
1176
1177void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
1178{
1179 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
1180 else SSE_MOVSS_M32_to_XMM(to, from);
1181}
1182
1183void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from )
1184{
1185 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
1186 else SSE_MOVSS_XMM_to_M32(to, from);
1187}
1188
1189void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
1190{
1191 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
1192 else SSE_MOVSS_XMM_to_Rm(to, from);
1193}
1194
1195void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
1196{
1197 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
1198 else SSE_MOVSS_RmOffset_to_XMM(to, from, offset);
1199}
1200
1201void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
1202{
1203 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
1204 else SSE_MOVSS_XMM_to_RmOffset(to, from, offset);
1205}
1206
1207void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from )
1208{
1209 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
1210 else SSE_ORPS_M128_to_XMM(to, from);
1211}
1212
1213void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1214{
1215 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
1216 else SSE_ORPS_XMM_to_XMM(to, from);
1217}
1218
1219void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from )
1220{
1221 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
1222 else SSE_XORPS_M128_to_XMM(to, from);
1223}
1224
1225void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1226{
1227 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
1228 else SSE_XORPS_XMM_to_XMM(to, from);
1229}
1230
1231void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from )
1232{
1233 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
1234 else SSE_ANDPS_M128_to_XMM(to, from);
1235}
1236
1237void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1238{
1239 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
1240 else SSE_ANDPS_XMM_to_XMM(to, from);
1241}
1242
1243void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from )
1244{
1245 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
1246 else SSE_ANDNPS_M128_to_XMM(to, from);
1247}
1248
1249void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1250{
1251 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
1252 else SSE_ANDNPS_XMM_to_XMM(to, from);
1253}
1254
1255void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from)
1256{
1257 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
1258 else SSE_UNPCKLPS_M128_to_XMM(to, from);
1259}
1260
1261void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
1262{
1263 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
1264 else SSE_UNPCKLPS_XMM_to_XMM(to, from);
1265}
1266
1267void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from)
1268{
1269 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
1270 else SSE_UNPCKHPS_M128_to_XMM(to, from);
1271}
1272
1273void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
1274{
1275 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
1276 else SSE_UNPCKHPS_XMM_to_XMM(to, from);
1277}
1278
1279void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1280{
1281 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) {
1282 SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from);
1283 if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e);
1284 }
1285 else {
1286 SSE_MOVHLPS_XMM_to_XMM(to, from);
1287 }
1288}
1289
1290// SSE2 emulation
1291void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from)
1292{
1293 SSE_SHUFPS_XMM_to_XMM(to, from, 0x4e);
1294 SSE_SHUFPS_XMM_to_XMM(to, to, 0x4e);
1295}
1296
1297void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from)
1298{
1299 SSE_XORPS_XMM_to_XMM(to, to);
1300 SSE_MOVLPS_M64_to_XMM(to, from);
1301}
1302
1303void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from)
1304{
1305 SSE_XORPS_XMM_to_XMM(to, to);
1306 SSE2EMU_MOVSD_XMM_to_XMM(to, from);
1307}
1308
1309void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
1310{
1311 MOV32RmtoROffset(EAX, from, offset);
1312 MOV32ItoM((uptr)p+4, 0);
1313 MOV32ItoM((uptr)p+8, 0);
1314 MOV32RtoM((uptr)p, EAX);
1315 MOV32ItoM((uptr)p+12, 0);
1316 SSE_MOVAPS_M128_to_XMM(to, (uptr)p);
1317}
1318
1319void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset )
1320{
1321 SSE_MOVSS_XMM_to_M32((uptr)p, from);
1322 MOV32MtoR(EAX, (uptr)p);
1323 MOV32RtoRmOffset(to, EAX, offset);
1324}
1325
1326#ifndef __x86_64__
1327extern void SetMMXstate();
1328
1329void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
1330{
1331 SSE_MOVLPS_XMM_to_M64(p, from);
1332 MOVQMtoR(to, p);
1333 SetMMXstate();
1334}
1335
1336void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
1337{
1338 MOVQRtoM(p, from);
1339 SSE_MOVLPS_M64_to_XMM(to, p);
1340 SetMMXstate();
1341}
1342#endif
1343
1344/****************************************************************************/
1345/* SSE2 Emulated functions for SSE CPU's by kekko */
1346/****************************************************************************/
1347void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) {
1348 MOV64ItoR(EAX, (uptr)&p);
1349 MOV64ItoR(EBX, (uptr)&p2);
1350 SSE_MOVUPSRtoRm(EAX, from);
1351
1352 MOV32ItoR(ECX, (u32)imm8);
1353 AND32ItoR(ECX, 3);
1354 SHL32ItoR(ECX, 2);
1355 ADD32RtoR(ECX, EAX);
1356 MOV32RmtoR(ECX, ECX);
1357 MOV32RtoRm(EBX, ECX);
1358
1359 ADD32ItoR(EBX, 4);
1360 MOV32ItoR(ECX, (u32)imm8);
1361 SHR32ItoR(ECX, 2);
1362 AND32ItoR(ECX, 3);
1363 SHL32ItoR(ECX, 2);
1364 ADD32RtoR(ECX, EAX);
1365 MOV32RmtoR(ECX, ECX);
1366 MOV32RtoRm(EBX, ECX);
1367
1368 ADD32ItoR(EBX, 4);
1369 MOV32ItoR(ECX, (u32)imm8);
1370 SHR32ItoR(ECX, 4);
1371 AND32ItoR(ECX, 3);
1372 SHL32ItoR(ECX, 2);
1373 ADD32RtoR(ECX, EAX);
1374 MOV32RmtoR(ECX, ECX);
1375 MOV32RtoRm(EBX, ECX);
1376
1377 ADD32ItoR(EBX, 4);
1378 MOV32ItoR(ECX, (u32)imm8);
1379 SHR32ItoR(ECX, 6);
1380 AND32ItoR(ECX, 3);
1381 SHL32ItoR(ECX, 2);
1382 ADD32RtoR(ECX, EAX);
1383 MOV32RmtoR(ECX, ECX);
1384 MOV32RtoRm(EBX, ECX);
1385
1386 SUB32ItoR(EBX, 12);
1387
1388 SSE_MOVUPSRmtoR(to, EBX);
1389}
1390
1391void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
1392 /* XXX? */
1393 MOV64ItoR(to, (uptr)&p);
1394 SSE_MOVUPSRtoRm(to, from);
1395 MOV32RmtoR(to, to);
1396}
1397
1398#ifndef __x86_64__
1399extern void SetFPUstate();
1400extern void _freeMMXreg(int mmxreg);
1401#endif
1402
1403void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) {
1404#ifndef __x86_64__
1405 SetFPUstate();
1406 _freeMMXreg(7);
1407#endif
1408 SSE_MOVAPS_XMM_to_M128((uptr)f, from);
1409
1410 FLD32((uptr)&f[0]);
1411 FISTP32((uptr)&p2[0]);
1412 FLD32((uptr)&f[1]);
1413 FISTP32((uptr)&p2[1]);
1414 FLD32((uptr)&f[2]);
1415 FISTP32((uptr)&p2[2]);
1416 FLD32((uptr)&f[3]);
1417 FISTP32((uptr)&p2[3]);
1418
1419 SSE_MOVAPS_M128_to_XMM(to, (uptr)p2);
1420}
1421
1422void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) {
1423#ifndef __x86_64__
1424 SetFPUstate();
1425 _freeMMXreg(7);
1426#endif
1427 FILD32(from);
1428 FSTP32((uptr)&f[0]);
1429 FILD32(from+4);
1430 FSTP32((uptr)&f[1]);
1431 FILD32(from+8);
1432 FSTP32((uptr)&f[2]);
1433 FILD32(from+12);
1434 FSTP32((uptr)&f[3]);
1435
1436 SSE_MOVAPS_M128_to_XMM(to, (uptr)f);
1437}
1438
1439void SSE2EMU_MOVD_XMM_to_M32( uptr to, x86SSERegType from ) {
1440 /* XXX? */
1441 MOV64ItoR(EAX, (uptr)&p);
1442 SSE_MOVUPSRtoRm(EAX, from);
1443 MOV32RmtoR(EAX, EAX);
1444 MOV32RtoM(to, EAX);
1445}
1446
1447void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) {
1448 MOV32ItoM((uptr)p+4, 0);
1449 MOV32ItoM((uptr)p+8, 0);
1450 MOV32RtoM((uptr)p, from);
1451 MOV32ItoM((uptr)p+12, 0);
1452 SSE_MOVAPS_M128_to_XMM(to, (uptr)p);
1453}
1454
1455#endif