git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / lzma-24.05 / src / CpuArch.c
CommitLineData
f535537f 1/* CpuArch.c -- CPU specific code
22024-03-02 : Igor Pavlov : Public domain */
3
4#include "Precomp.h"
5
6// #include <stdio.h>
7
8#include "CpuArch.h"
9
10#ifdef MY_CPU_X86_OR_AMD64
11
12#undef NEED_CHECK_FOR_CPUID
13#if !defined(MY_CPU_AMD64)
14#define NEED_CHECK_FOR_CPUID
15#endif
16
17/*
18 cpuid instruction supports (subFunction) parameter in ECX,
19 that is used only with some specific (function) parameter values.
20 But we always use only (subFunction==0).
21*/
22/*
23 __cpuid(): MSVC and GCC/CLANG use same function/macro name
24 but parameters are different.
25 We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
26*/
27
28#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
29 || defined(__clang__) /* && (__clang_major__ >= 10) */
30
31/* there was some CLANG/GCC compilers that have issues with
32 rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
33 compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
34 The history of __cpuid() changes in CLANG/GCC:
35 GCC:
36 2007: it preserved ebx for (__PIC__ && __i386__)
37 2013: it preserved rbx and ebx for __PIC__
38 2014: it doesn't preserves rbx and ebx anymore
39 we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
40 CLANG:
41 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
42 Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
43 Do we need __PIC__ test for CLANG or we must care about rbx even if
44 __PIC__ is not defined?
45*/
46
47#define ASM_LN "\n"
48
49#if defined(MY_CPU_AMD64) && defined(__PIC__) \
50 && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
51
52#define x86_cpuid_MACRO(p, func) { \
53 __asm__ __volatile__ ( \
54 ASM_LN "mov %%rbx, %q1" \
55 ASM_LN "cpuid" \
56 ASM_LN "xchg %%rbx, %q1" \
57 : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
58
59 /* "=&r" selects free register. It can select even rbx, if that register is free.
60 "=&D" for (RDI) also works, but the code can be larger with "=&D"
61 "2"(0) means (subFunction = 0),
62 2 is (zero-based) index in the output constraint list "=c" (ECX). */
63
64#elif defined(MY_CPU_X86) && defined(__PIC__) \
65 && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
66
67#define x86_cpuid_MACRO(p, func) { \
68 __asm__ __volatile__ ( \
69 ASM_LN "mov %%ebx, %k1" \
70 ASM_LN "cpuid" \
71 ASM_LN "xchg %%ebx, %k1" \
72 : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
73
74#else
75
76#define x86_cpuid_MACRO(p, func) { \
77 __asm__ __volatile__ ( \
78 ASM_LN "cpuid" \
79 : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
80
81#endif
82
83
84void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
85{
86 x86_cpuid_MACRO(p, func)
87}
88
89
90Z7_NO_INLINE
91UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
92{
93 #if defined(NEED_CHECK_FOR_CPUID)
94 #define EFALGS_CPUID_BIT 21
95 UInt32 a;
96 __asm__ __volatile__ (
97 ASM_LN "pushf"
98 ASM_LN "pushf"
99 ASM_LN "pop %0"
100 // ASM_LN "movl %0, %1"
101 // ASM_LN "xorl $0x200000, %0"
102 ASM_LN "btc %1, %0"
103 ASM_LN "push %0"
104 ASM_LN "popf"
105 ASM_LN "pushf"
106 ASM_LN "pop %0"
107 ASM_LN "xorl (%%esp), %0"
108
109 ASM_LN "popf"
110 ASM_LN
111 : "=&r" (a) // "=a"
112 : "i" (EFALGS_CPUID_BIT)
113 );
114 if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
115 return 0;
116 #endif
117 {
118 UInt32 p[4];
119 x86_cpuid_MACRO(p, 0)
120 return p[0];
121 }
122}
123
124#undef ASM_LN
125
126#elif !defined(_MSC_VER)
127
128/*
129// for gcc/clang and other: we can try to use __cpuid macro:
130#include <cpuid.h>
131void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
132{
133 __cpuid(func, p[0], p[1], p[2], p[3]);
134}
135UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
136{
137 return (UInt32)__get_cpuid_max(0, NULL);
138}
139*/
140// for unsupported cpuid:
141void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
142{
143 UNUSED_VAR(func)
144 p[0] = p[1] = p[2] = p[3] = 0;
145}
146UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
147{
148 return 0;
149}
150
151#else // _MSC_VER
152
153#if !defined(MY_CPU_AMD64)
154
155UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
156{
157 #if defined(NEED_CHECK_FOR_CPUID)
158 #define EFALGS_CPUID_BIT 21
159 __asm pushfd
160 __asm pushfd
161 /*
162 __asm pop eax
163 // __asm mov edx, eax
164 __asm btc eax, EFALGS_CPUID_BIT
165 __asm push eax
166 */
167 __asm btc dword ptr [esp], EFALGS_CPUID_BIT
168 __asm popfd
169 __asm pushfd
170 __asm pop eax
171 // __asm xor eax, edx
172 __asm xor eax, [esp]
173 // __asm push edx
174 __asm popfd
175 __asm and eax, (1 shl EFALGS_CPUID_BIT)
176 __asm jz end_func
177 #endif
178 __asm push ebx
179 __asm xor eax, eax // func
180 __asm xor ecx, ecx // subFunction (optional) for (func == 0)
181 __asm cpuid
182 __asm pop ebx
183 #if defined(NEED_CHECK_FOR_CPUID)
184 end_func:
185 #endif
186 __asm ret 0
187}
188
189void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
190{
191 UNUSED_VAR(p)
192 UNUSED_VAR(func)
193 __asm push ebx
194 __asm push edi
195 __asm mov edi, ecx // p
196 __asm mov eax, edx // func
197 __asm xor ecx, ecx // subfunction (optional) for (func == 0)
198 __asm cpuid
199 __asm mov [edi ], eax
200 __asm mov [edi + 4], ebx
201 __asm mov [edi + 8], ecx
202 __asm mov [edi + 12], edx
203 __asm pop edi
204 __asm pop ebx
205 __asm ret 0
206}
207
208#else // MY_CPU_AMD64
209
210 #if _MSC_VER >= 1600
211 #include <intrin.h>
212 #define MY_cpuidex __cpuidex
213 #else
214/*
215 __cpuid (func == (0 or 7)) requires subfunction number in ECX.
216 MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
217 __cpuid() in new MSVC clears ECX.
218 __cpuid() in old MSVC (14.00) x64 doesn't clear ECX
219 We still can use __cpuid for low (func) values that don't require ECX,
220 but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
221 So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
222 where ECX value is first parameter for FASTCALL / NO_INLINE func,
223 So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
224 old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
225
226DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
227*/
228static
229Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)
230{
231 UNUSED_VAR(subFunction)
232 __cpuid(CPUInfo, func);
233}
234 #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
235 #pragma message("======== MY_cpuidex_HACK WAS USED ========")
236 #endif // _MSC_VER >= 1600
237
238#if !defined(MY_CPU_AMD64)
239/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
240 so we disable inlining here */
241Z7_NO_INLINE
242#endif
243void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
244{
245 MY_cpuidex((Int32 *)p, (Int32)func, 0);
246}
247
248Z7_NO_INLINE
249UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
250{
251 Int32 a[4];
252 MY_cpuidex(a, 0, 0);
253 return a[0];
254}
255
256#endif // MY_CPU_AMD64
257#endif // _MSC_VER
258
259#if defined(NEED_CHECK_FOR_CPUID)
260#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
261#else
262#define CHECK_CPUID_IS_SUPPORTED
263#endif
264#undef NEED_CHECK_FOR_CPUID
265
266
267static
268BoolInt x86cpuid_Func_1(UInt32 *p)
269{
270 CHECK_CPUID_IS_SUPPORTED
271 z7_x86_cpuid(p, 1);
272 return True;
273}
274
275/*
276static const UInt32 kVendors[][1] =
277{
278 { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
279 { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
280 { 0x746E6543 } // , 0x48727561, 0x736C7561 }
281};
282*/
283
284/*
285typedef struct
286{
287 UInt32 maxFunc;
288 UInt32 vendor[3];
289 UInt32 ver;
290 UInt32 b;
291 UInt32 c;
292 UInt32 d;
293} Cx86cpuid;
294
295enum
296{
297 CPU_FIRM_INTEL,
298 CPU_FIRM_AMD,
299 CPU_FIRM_VIA
300};
301int x86cpuid_GetFirm(const Cx86cpuid *p);
302#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
303#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
304#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
305
306int x86cpuid_GetFirm(const Cx86cpuid *p)
307{
308 unsigned i;
309 for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
310 {
311 const UInt32 *v = kVendors[i];
312 if (v[0] == p->vendor[0]
313 // && v[1] == p->vendor[1]
314 // && v[2] == p->vendor[2]
315 )
316 return (int)i;
317 }
318 return -1;
319}
320
321BoolInt CPU_Is_InOrder()
322{
323 Cx86cpuid p;
324 UInt32 family, model;
325 if (!x86cpuid_CheckAndRead(&p))
326 return True;
327
328 family = x86cpuid_ver_GetFamily(p.ver);
329 model = x86cpuid_ver_GetModel(p.ver);
330
331 switch (x86cpuid_GetFirm(&p))
332 {
333 case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
334 // In-Order Atom CPU
335 model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
336 || model == 0x26 // 45 nm, Z6xx
337 || model == 0x27 // 32 nm, Z2460
338 || model == 0x35 // 32 nm, Z2760
339 || model == 0x36 // 32 nm, N2xxx, D2xxx
340 )));
341 case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
342 case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
343 }
344 return False; // v23 : unknown processors are not In-Order
345}
346*/
347
348#ifdef _WIN32
349#include "7zWindows.h"
350#endif
351
352#if !defined(MY_CPU_AMD64) && defined(_WIN32)
353
354/* for legacy SSE ia32: there is no user-space cpu instruction to check
355 that OS supports SSE register storing/restoring on context switches.
356 So we need some OS-specific function to check that it's safe to use SSE registers.
357*/
358
359Z7_FORCE_INLINE
360static BoolInt CPU_Sys_Is_SSE_Supported(void)
361{
362#ifdef _MSC_VER
363 #pragma warning(push)
364 #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
365#endif
366 /* low byte is major version of Windows
367 We suppose that any Windows version since
368 Windows2000 (major == 5) supports SSE registers */
369 return (Byte)GetVersion() >= 5;
370#if defined(_MSC_VER)
371 #pragma warning(pop)
372#endif
373}
374#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
375#else
376#define CHECK_SYS_SSE_SUPPORT
377#endif
378
379
380#if !defined(MY_CPU_AMD64)
381
382BoolInt CPU_IsSupported_CMOV(void)
383{
384 UInt32 a[4];
385 if (!x86cpuid_Func_1(&a[0]))
386 return 0;
387 return (BoolInt)(a[3] >> 15) & 1;
388}
389
390BoolInt CPU_IsSupported_SSE(void)
391{
392 UInt32 a[4];
393 CHECK_SYS_SSE_SUPPORT
394 if (!x86cpuid_Func_1(&a[0]))
395 return 0;
396 return (BoolInt)(a[3] >> 25) & 1;
397}
398
399BoolInt CPU_IsSupported_SSE2(void)
400{
401 UInt32 a[4];
402 CHECK_SYS_SSE_SUPPORT
403 if (!x86cpuid_Func_1(&a[0]))
404 return 0;
405 return (BoolInt)(a[3] >> 26) & 1;
406}
407
408#endif
409
410
411static UInt32 x86cpuid_Func_1_ECX(void)
412{
413 UInt32 a[4];
414 CHECK_SYS_SSE_SUPPORT
415 if (!x86cpuid_Func_1(&a[0]))
416 return 0;
417 return a[2];
418}
419
420BoolInt CPU_IsSupported_AES(void)
421{
422 return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;
423}
424
425BoolInt CPU_IsSupported_SSSE3(void)
426{
427 return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;
428}
429
430BoolInt CPU_IsSupported_SSE41(void)
431{
432 return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;
433}
434
435BoolInt CPU_IsSupported_SHA(void)
436{
437 CHECK_SYS_SSE_SUPPORT
438
439 if (z7_x86_cpuid_GetMaxFunc() < 7)
440 return False;
441 {
442 UInt32 d[4];
443 z7_x86_cpuid(d, 7);
444 return (BoolInt)(d[1] >> 29) & 1;
445 }
446}
447
448/*
449MSVC: _xgetbv() intrinsic is available since VS2010SP1.
450 MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
451 <immintrin.h> that we can use or check.
452 For any 32-bit x86 we can use asm code in MSVC,
453 but MSVC asm code is huge after compilation.
454 So _xgetbv() is better
455
456ICC: _xgetbv() intrinsic is available (in what version of ICC?)
457 ICC defines (__GNUC___) and it supports gnu assembler
458 also ICC supports MASM style code with -use-msasm switch.
459 but ICC doesn't support __attribute__((__target__))
460
461GCC/CLANG 9:
462 _xgetbv() is macro that works via __builtin_ia32_xgetbv()
463 and we need __attribute__((__target__("xsave")).
464 But with __target__("xsave") the function will be not
465 inlined to function that has no __target__("xsave") attribute.
466 If we want _xgetbv() call inlining, then we should use asm version
467 instead of calling _xgetbv().
468 Note:intrinsic is broke before GCC 8.2:
469 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
470*/
471
472#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
473 || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
474 || defined(__GNUC__) && (__GNUC__ >= 9) \
475 || defined(__clang__) && (__clang_major__ >= 9)
476// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
477#if defined(__INTEL_COMPILER)
478#define ATTRIB_XGETBV
479#elif defined(__GNUC__) || defined(__clang__)
480// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
481// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
482#else
483#define ATTRIB_XGETBV
484#endif
485#endif
486
487#if defined(ATTRIB_XGETBV)
488#include <immintrin.h>
489#endif
490
491
492// XFEATURE_ENABLED_MASK/XCR0
493#define MY_XCR_XFEATURE_ENABLED_MASK 0
494
495#if defined(ATTRIB_XGETBV)
496ATTRIB_XGETBV
497#endif
498static UInt64 x86_xgetbv_0(UInt32 num)
499{
500#if defined(ATTRIB_XGETBV)
501 {
502 return
503 #if (defined(_MSC_VER))
504 _xgetbv(num);
505 #else
506 __builtin_ia32_xgetbv(
507 #if !defined(__clang__)
508 (int)
509 #endif
510 num);
511 #endif
512 }
513
514#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
515
516 UInt32 a, d;
517 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
518 __asm__
519 (
520 "xgetbv"
521 : "=a"(a), "=d"(d) : "c"(num) : "cc"
522 );
523 #else // is old gcc
524 __asm__
525 (
526 ".byte 0x0f, 0x01, 0xd0" "\n\t"
527 : "=a"(a), "=d"(d) : "c"(num) : "cc"
528 );
529 #endif
530 return ((UInt64)d << 32) | a;
531 // return a;
532
533#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
534
535 UInt32 a, d;
536 __asm {
537 push eax
538 push edx
539 push ecx
540 mov ecx, num;
541 // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
542 _emit 0x0f
543 _emit 0x01
544 _emit 0xd0
545 mov a, eax
546 mov d, edx
547 pop ecx
548 pop edx
549 pop eax
550 }
551 return ((UInt64)d << 32) | a;
552 // return a;
553
554#else // it's unknown compiler
555 // #error "Need xgetbv function"
556 UNUSED_VAR(num)
557 // for MSVC-X64 we could call external function from external file.
558 /* Actually we had checked OSXSAVE/AVX in cpuid before.
559 So it's expected that OS supports at least AVX and below. */
560 // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
561 return
562 // (1 << 0) | // x87
563 (1 << 1) // SSE
564 | (1 << 2); // AVX
565
566#endif
567}
568
569#ifdef _WIN32
570/*
571 Windows versions do not know about new ISA extensions that
572 can be introduced. But we still can use new extensions,
573 even if Windows doesn't report about supporting them,
574 But we can use new extensions, only if Windows knows about new ISA extension
575 that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
576 So it's enough to check
577 MY_PF_AVX_INSTRUCTIONS_AVAILABLE
578 instead of
579 MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
580*/
581#define MY_PF_XSAVE_ENABLED 17
582// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
583// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
584// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
585// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
586// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
587// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
588#endif
589
590BoolInt CPU_IsSupported_AVX(void)
591{
592 #ifdef _WIN32
593 if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
594 return False;
595 /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
596 some latest Win10 revisions. But we need AVX in older Windows also.
597 So we don't use the following check: */
598 /*
599 if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
600 return False;
601 */
602 #endif
603
604 /*
605 OS must use new special XSAVE/XRSTOR instructions to save
606 AVX registers when it required for context switching.
607 At OS statring:
608 OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
609 Also OS sets bitmask in XCR0 register that defines what
610 registers will be processed by XSAVE instruction:
611 XCR0.SSE[bit 0] - x87 registers and state
612 XCR0.SSE[bit 1] - SSE registers and state
613 XCR0.AVX[bit 2] - AVX registers and state
614 CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
615 So we can read that bit in user-space.
616 XCR0 is available for reading in user-space by new XGETBV instruction.
617 */
618 {
619 const UInt32 c = x86cpuid_Func_1_ECX();
620 if (0 == (1
621 & (c >> 28) // AVX instructions are supported by hardware
622 & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
623 return False;
624 }
625
626 /* also we can check
627 CPUID.1:ECX.XSAVE [bit 26] : that shows that
628 XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
629 But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
630
631 /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
632 in most cases we expect that OS also will support storing/restoring
633 for AVX and SSE states at least.
634 But to be ensure for that we call user-space instruction
635 XGETBV(0) to get XCR0 value that contains bitmask that defines
636 what exact states(registers) OS have enabled for storing/restoring.
637 */
638
639 {
640 const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
641 // printf("\n=== XGetBV=%d\n", bm);
642 return 1
643 & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring
644 & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring
645 }
646 // since Win7SP1: we can use GetEnabledXStateFeatures();
647}
648
649
650BoolInt CPU_IsSupported_AVX2(void)
651{
652 if (!CPU_IsSupported_AVX())
653 return False;
654 if (z7_x86_cpuid_GetMaxFunc() < 7)
655 return False;
656 {
657 UInt32 d[4];
658 z7_x86_cpuid(d, 7);
659 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
660 return 1
661 & (BoolInt)(d[1] >> 5); // avx2
662 }
663}
664
665/*
666// fix it:
667BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)
668{
669 if (!CPU_IsSupported_AVX())
670 return False;
671 if (z7_x86_cpuid_GetMaxFunc() < 7)
672 return False;
673 {
674 UInt32 d[4];
675 z7_x86_cpuid(d, 7);
676 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
677 return 1
678 & (BoolInt)(d[1] >> 16) // avx512-f
679 & (BoolInt)(d[1] >> 31); // avx512-Vl
680 }
681}
682*/
683
684BoolInt CPU_IsSupported_VAES_AVX2(void)
685{
686 if (!CPU_IsSupported_AVX())
687 return False;
688 if (z7_x86_cpuid_GetMaxFunc() < 7)
689 return False;
690 {
691 UInt32 d[4];
692 z7_x86_cpuid(d, 7);
693 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
694 return 1
695 & (BoolInt)(d[1] >> 5) // avx2
696 // & (d[1] >> 31) // avx512vl
697 & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX
698 }
699}
700
701BoolInt CPU_IsSupported_PageGB(void)
702{
703 CHECK_CPUID_IS_SUPPORTED
704 {
705 UInt32 d[4];
706 z7_x86_cpuid(d, 0x80000000);
707 if (d[0] < 0x80000001)
708 return False;
709 z7_x86_cpuid(d, 0x80000001);
710 return (BoolInt)(d[3] >> 26) & 1;
711 }
712}
713
714
715#elif defined(MY_CPU_ARM_OR_ARM64)
716
717#ifdef _WIN32
718
719#include "7zWindows.h"
720
721BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
722BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
723BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
724
725#else
726
727#if defined(__APPLE__)
728
729/*
730#include <stdio.h>
731#include <string.h>
732static void Print_sysctlbyname(const char *name)
733{
734 size_t bufSize = 256;
735 char buf[256];
736 int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
737 {
738 int i;
739 printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
740 for (i = 0; i < 20; i++)
741 printf(" %2x", (unsigned)(Byte)buf[i]);
742
743 }
744}
745*/
746/*
747 Print_sysctlbyname("hw.pagesize");
748 Print_sysctlbyname("machdep.cpu.brand_string");
749*/
750
751static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
752{
753 UInt32 val = 0;
754 if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
755 return 1;
756 return 0;
757}
758
759BoolInt CPU_IsSupported_CRC32(void)
760{
761 return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
762}
763
764BoolInt CPU_IsSupported_NEON(void)
765{
766 return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
767}
768
769#ifdef MY_CPU_ARM64
770#define APPLE_CRYPTO_SUPPORT_VAL 1
771#else
772#define APPLE_CRYPTO_SUPPORT_VAL 0
773#endif
774
775BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
776BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
777BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
778
779
780#else // __APPLE__
781
782#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)
783 #define Z7_GETAUXV_AVAILABLE
784#else
785// #pragma message("=== is not NEW GLIBC === ")
786 #if defined __has_include
787 #if __has_include (<sys/auxv.h>)
788// #pragma message("=== sys/auxv.h is avail=== ")
789 #define Z7_GETAUXV_AVAILABLE
790 #endif
791 #endif
792#endif
793
794#ifdef Z7_GETAUXV_AVAILABLE
795// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")
796#include <sys/auxv.h>
797#define USE_HWCAP
798#endif
799
800#ifdef USE_HWCAP
801
802#if defined(__FreeBSD__)
803static unsigned long MY_getauxval(int aux)
804{
805 unsigned long val;
806 if (elf_aux_info(aux, &val, sizeof(val)))
807 return 0;
808 return val;
809}
810#else
811#define MY_getauxval getauxval
812 #if defined __has_include
813 #if __has_include (<asm/hwcap.h>)
814#include <asm/hwcap.h>
815 #endif
816 #endif
817#endif
818
819 #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
820 BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); }
821
822#ifdef MY_CPU_ARM64
823 #define MY_HWCAP_CHECK_FUNC(name) \
824 MY_HWCAP_CHECK_FUNC_2(name, name)
825#if 1 || defined(__ARM_NEON)
826 BoolInt CPU_IsSupported_NEON(void) { return True; }
827#else
828 MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
829#endif
830// MY_HWCAP_CHECK_FUNC (ASIMD)
831#elif defined(MY_CPU_ARM)
832 #define MY_HWCAP_CHECK_FUNC(name) \
833 BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); }
834 MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
835#endif
836
837#else // USE_HWCAP
838
839 #define MY_HWCAP_CHECK_FUNC(name) \
840 BoolInt CPU_IsSupported_ ## name(void) { return 0; }
841
842#if defined(__SWITCH__) || defined(__vita__)
843 BoolInt CPU_IsSupported_NEON(void) { return 1; }
844#else
845 MY_HWCAP_CHECK_FUNC(NEON)
846#endif
847
848#endif // USE_HWCAP
849
850MY_HWCAP_CHECK_FUNC (CRC32)
851MY_HWCAP_CHECK_FUNC (SHA1)
852MY_HWCAP_CHECK_FUNC (SHA2)
853MY_HWCAP_CHECK_FUNC (AES)
854
855#endif // __APPLE__
856#endif // _WIN32
857
858#endif // MY_CPU_ARM_OR_ARM64
859
860
861
862#ifdef __APPLE__
863
864#include <sys/sysctl.h>
865
866int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
867{
868 return sysctlbyname(name, buf, bufSize, NULL, 0);
869}
870
871int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
872{
873 size_t bufSize = sizeof(*val);
874 const int res = z7_sysctlbyname_Get(name, val, &bufSize);
875 if (res == 0 && bufSize != sizeof(*val))
876 return EFAULT;
877 return res;
878}
879
880#endif