1 /* CpuArch.c -- CPU specific code
\r
2 2021-07-13 : Igor Pavlov : Public domain */
\r
8 #ifdef MY_CPU_X86_OR_AMD64
\r
10 #if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
\r
14 #if !defined(USE_ASM) && _MSC_VER >= 1500
\r
18 #if defined(USE_ASM) && !defined(MY_CPU_AMD64)
\r
19 static UInt32 CheckFlag(UInt32 flag)
\r
25 __asm xor EAX, flag;
\r
33 __asm and flag, EAX;
\r
35 __asm__ __volatile__ (
\r
38 "movl %%EAX,%%EDX\n\t"
\r
44 "xorl %%EDX,%%EAX\n\t"
\r
47 "andl %%EAX, %0\n\t":
\r
48 "=c" (flag) : "c" (flag) :
\r
53 #define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
\r
55 #define CHECK_CPUID_IS_SUPPORTED
\r
60 #if _MSC_VER >= 1600
\r
61 #define MY__cpuidex __cpuidex
\r
65 __cpuid (function == 4) requires subfunction number in ECX.
\r
66 MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
\r
67 __cpuid() in new MSVC clears ECX.
\r
68 __cpuid() in old MSVC (14.00) doesn't clear ECX
\r
69 We still can use __cpuid for low (function) values that don't require ECX,
\r
70 but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
\r
71 So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
\r
72 where ECX value is first parameter for FAST_CALL / NO_INLINE function,
\r
73 So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
\r
74 old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
\r
76 DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
\r
81 void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
\r
83 UNUSED_VAR(subFunction);
\r
84 __cpuid(CPUInfo, function);
\r
87 #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
\r
88 #pragma message("======== MY__cpuidex_HACK WAS USED ========")
\r
91 #define MY__cpuidex(info, func, func2) __cpuid(info, func)
\r
92 #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
\r
99 void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
\r
105 UInt32 a2, b2, c2, d2;
\r
106 __asm xor EBX, EBX;
\r
107 __asm xor ECX, ECX;
\r
108 __asm xor EDX, EDX;
\r
109 __asm mov EAX, function;
\r
123 __asm__ __volatile__ (
\r
124 #if defined(MY_CPU_AMD64) && defined(__PIC__)
\r
125 "mov %%rbx, %%rdi;"
\r
127 "xchg %%rbx, %%rdi;"
\r
130 #elif defined(MY_CPU_X86) && defined(__PIC__)
\r
131 "mov %%ebx, %%edi;"
\r
133 "xchgl %%ebx, %%edi;"
\r
143 : "0" (function), "c"(0) ) ;
\r
151 MY__cpuidex(CPUInfo, (int)function, 0);
\r
153 *a = (UInt32)CPUInfo[0];
\r
154 *b = (UInt32)CPUInfo[1];
\r
155 *c = (UInt32)CPUInfo[2];
\r
156 *d = (UInt32)CPUInfo[3];
\r
161 BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
\r
163 CHECK_CPUID_IS_SUPPORTED
\r
164 MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
\r
165 MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
\r
169 static const UInt32 kVendors[][3] =
\r
171 { 0x756E6547, 0x49656E69, 0x6C65746E},
\r
172 { 0x68747541, 0x69746E65, 0x444D4163},
\r
173 { 0x746E6543, 0x48727561, 0x736C7561}
\r
176 int x86cpuid_GetFirm(const Cx86cpuid *p)
\r
179 for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
\r
181 const UInt32 *v = kVendors[i];
\r
182 if (v[0] == p->vendor[0] &&
\r
183 v[1] == p->vendor[1] &&
\r
184 v[2] == p->vendor[2])
\r
190 BoolInt CPU_Is_InOrder()
\r
194 UInt32 family, model;
\r
195 if (!x86cpuid_CheckAndRead(&p))
\r
198 family = x86cpuid_GetFamily(p.ver);
\r
199 model = x86cpuid_GetModel(p.ver);
\r
201 firm = x86cpuid_GetFirm(&p);
\r
205 case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
\r
206 /* In-Order Atom CPU */
\r
207 model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
\r
208 || model == 0x26 /* 45 nm, Z6xx */
\r
209 || model == 0x27 /* 32 nm, Z2460 */
\r
210 || model == 0x35 /* 32 nm, Z2760 */
\r
211 || model == 0x36 /* 32 nm, N2xxx, D2xxx */
\r
213 case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
\r
214 case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
\r
219 #if !defined(MY_CPU_AMD64) && defined(_WIN32)
\r
220 #include <Windows.h>
\r
221 static BoolInt CPU_Sys_Is_SSE_Supported()
\r
224 vi.dwOSVersionInfoSize = sizeof(vi);
\r
225 if (!GetVersionEx(&vi))
\r
227 return (vi.dwMajorVersion >= 5);
\r
229 #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
\r
231 #define CHECK_SYS_SSE_SUPPORT
\r
235 static UInt32 X86_CPUID_ECX_Get_Flags()
\r
238 CHECK_SYS_SSE_SUPPORT
\r
239 if (!x86cpuid_CheckAndRead(&p))
\r
244 BoolInt CPU_IsSupported_AES()
\r
246 return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
\r
249 BoolInt CPU_IsSupported_SSSE3()
\r
251 return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
\r
254 BoolInt CPU_IsSupported_SSE41()
\r
256 return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
\r
259 BoolInt CPU_IsSupported_SHA()
\r
262 CHECK_SYS_SSE_SUPPORT
\r
263 if (!x86cpuid_CheckAndRead(&p))
\r
269 UInt32 d[4] = { 0 };
\r
270 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
\r
271 return (d[1] >> 29) & 1;
\r
275 // #include <stdio.h>
\r
278 #include <Windows.h>
\r
281 BoolInt CPU_IsSupported_AVX2()
\r
284 CHECK_SYS_SSE_SUPPORT
\r
287 #define MY__PF_XSAVE_ENABLED 17
\r
288 if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
\r
292 if (!x86cpuid_CheckAndRead(&p))
\r
297 UInt32 d[4] = { 0 };
\r
298 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
\r
299 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
\r
301 & (d[1] >> 5); // avx2
\r
305 BoolInt CPU_IsSupported_VAES_AVX2()
\r
308 CHECK_SYS_SSE_SUPPORT
\r
311 #define MY__PF_XSAVE_ENABLED 17
\r
312 if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
\r
316 if (!x86cpuid_CheckAndRead(&p))
\r
321 UInt32 d[4] = { 0 };
\r
322 MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
\r
323 // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
\r
325 & (d[1] >> 5) // avx2
\r
326 // & (d[1] >> 31) // avx512vl
\r
327 & (d[2] >> 9); // vaes // VEX-256/EVEX
\r
331 BoolInt CPU_IsSupported_PageGB()
\r
334 if (!x86cpuid_CheckAndRead(&cpuid))
\r
337 UInt32 d[4] = { 0 };
\r
338 MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
\r
339 if (d[0] < 0x80000001)
\r
343 UInt32 d[4] = { 0 };
\r
344 MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
\r
345 return (d[3] >> 26) & 1;
\r
350 #elif defined(MY_CPU_ARM_OR_ARM64)
\r
354 #include <Windows.h>
\r
356 BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
\r
357 BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
\r
358 BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
\r
362 #if defined(__APPLE__)
\r
366 #include <string.h>
\r
367 static void Print_sysctlbyname(const char *name)
\r
369 size_t bufSize = 256;
\r
371 int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
\r
374 printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
\r
375 for (i = 0; i < 20; i++)
\r
376 printf(" %2x", (unsigned)(Byte)buf[i]);
\r
382 static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
\r
385 if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
\r
391 Print_sysctlbyname("hw.pagesize");
\r
392 Print_sysctlbyname("machdep.cpu.brand_string");
\r
395 BoolInt CPU_IsSupported_CRC32(void)
\r
397 return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
\r
400 BoolInt CPU_IsSupported_NEON(void)
\r
402 return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
\r
405 #ifdef MY_CPU_ARM64
\r
406 #define APPLE_CRYPTO_SUPPORT_VAL 1
\r
408 #define APPLE_CRYPTO_SUPPORT_VAL 0
\r
411 BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
\r
412 BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
\r
413 BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
\r
418 #if defined(__SWITCH__) || defined(__vita__)
\r
420 BoolInt CPU_IsSupported_CRC32(void) { return 0; }
\r
421 BoolInt CPU_IsSupported_NEON(void) { return 1; }
\r
422 BoolInt CPU_IsSupported_SHA1(void) { return 0; }
\r
423 BoolInt CPU_IsSupported_SHA2(void) { return 0; }
\r
424 BoolInt CPU_IsSupported_AES (void) { return 0; }
\r
428 #include <sys/auxv.h>
\r
434 #include <asm/hwcap.h>
\r
436 #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
\r
437 BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
\r
439 #ifdef MY_CPU_ARM64
\r
440 #define MY_HWCAP_CHECK_FUNC(name) \
\r
441 MY_HWCAP_CHECK_FUNC_2(name, name)
\r
442 MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
\r
443 // MY_HWCAP_CHECK_FUNC (ASIMD)
\r
444 #elif defined(MY_CPU_ARM)
\r
445 #define MY_HWCAP_CHECK_FUNC(name) \
\r
446 BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
\r
447 MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
\r
452 #define MY_HWCAP_CHECK_FUNC(name) \
\r
453 BoolInt CPU_IsSupported_ ## name() { return 0; }
\r
454 MY_HWCAP_CHECK_FUNC(NEON)
\r
456 #endif // USE_HWCAP
\r
458 MY_HWCAP_CHECK_FUNC (CRC32)
\r
459 MY_HWCAP_CHECK_FUNC (SHA1)
\r
460 MY_HWCAP_CHECK_FUNC (SHA2)
\r
461 MY_HWCAP_CHECK_FUNC (AES)
\r
464 #endif // __APPLE__
\r
467 #endif // MY_CPU_ARM_OR_ARM64
\r
473 #include <sys/sysctl.h>
\r
475 int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
\r
477 return sysctlbyname(name, buf, bufSize, NULL, 0);
\r
480 int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
\r
482 size_t bufSize = sizeof(*val);
\r
483 int res = My_sysctlbyname_Get(name, val, &bufSize);
\r
484 if (res == 0 && bufSize != sizeof(*val))
\r