9e052883 |
1 | /* CpuArch.c -- CPU specific code\r |
2 | 2021-07-13 : Igor Pavlov : Public domain */\r |
3 | \r |
4 | #include "Precomp.h"\r |
5 | \r |
6 | #include "CpuArch.h"\r |
7 | \r |
8 | #ifdef MY_CPU_X86_OR_AMD64\r |
9 | \r |
10 | #if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)\r |
11 | #define USE_ASM\r |
12 | #endif\r |
13 | \r |
14 | #if !defined(USE_ASM) && _MSC_VER >= 1500\r |
15 | #include <intrin.h>\r |
16 | #endif\r |
17 | \r |
18 | #if defined(USE_ASM) && !defined(MY_CPU_AMD64)\r |
19 | static UInt32 CheckFlag(UInt32 flag)\r |
20 | {\r |
21 | #ifdef _MSC_VER\r |
22 | __asm pushfd;\r |
23 | __asm pop EAX;\r |
24 | __asm mov EDX, EAX;\r |
25 | __asm xor EAX, flag;\r |
26 | __asm push EAX;\r |
27 | __asm popfd;\r |
28 | __asm pushfd;\r |
29 | __asm pop EAX;\r |
30 | __asm xor EAX, EDX;\r |
31 | __asm push EDX;\r |
32 | __asm popfd;\r |
33 | __asm and flag, EAX;\r |
34 | #else\r |
35 | __asm__ __volatile__ (\r |
36 | "pushf\n\t"\r |
37 | "pop %%EAX\n\t"\r |
38 | "movl %%EAX,%%EDX\n\t"\r |
39 | "xorl %0,%%EAX\n\t"\r |
40 | "push %%EAX\n\t"\r |
41 | "popf\n\t"\r |
42 | "pushf\n\t"\r |
43 | "pop %%EAX\n\t"\r |
44 | "xorl %%EDX,%%EAX\n\t"\r |
45 | "push %%EDX\n\t"\r |
46 | "popf\n\t"\r |
47 | "andl %%EAX, %0\n\t":\r |
48 | "=c" (flag) : "c" (flag) :\r |
49 | "%eax", "%edx");\r |
50 | #endif\r |
51 | return flag;\r |
52 | }\r |
53 | #define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;\r |
54 | #else\r |
55 | #define CHECK_CPUID_IS_SUPPORTED\r |
56 | #endif\r |
57 | \r |
58 | #ifndef USE_ASM\r |
59 | #ifdef _MSC_VER\r |
60 | #if _MSC_VER >= 1600\r |
61 | #define MY__cpuidex __cpuidex\r |
62 | #else\r |
63 | \r |
64 | /*\r |
65 | __cpuid (function == 4) requires subfunction number in ECX.\r |
66 | MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.\r |
67 | __cpuid() in new MSVC clears ECX.\r |
68 | __cpuid() in old MSVC (14.00) doesn't clear ECX\r |
69 | We still can use __cpuid for low (function) values that don't require ECX,\r |
70 | but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).\r |
71 | So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,\r |
72 | where ECX value is first parameter for FAST_CALL / NO_INLINE function,\r |
73 | So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and\r |
74 | old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.\r |
75 | \r |
76 | DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!\r |
77 | */\r |
78 | \r |
79 | static\r |
80 | MY_NO_INLINE\r |
81 | void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)\r |
82 | {\r |
83 | UNUSED_VAR(subFunction);\r |
84 | __cpuid(CPUInfo, function);\r |
85 | }\r |
86 | \r |
87 | #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)\r |
88 | #pragma message("======== MY__cpuidex_HACK WAS USED ========")\r |
89 | #endif\r |
90 | #else\r |
91 | #define MY__cpuidex(info, func, func2) __cpuid(info, func)\r |
92 | #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")\r |
93 | #endif\r |
94 | #endif\r |
95 | \r |
96 | \r |
97 | \r |
98 | \r |
99 | void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)\r |
100 | {\r |
101 | #ifdef USE_ASM\r |
102 | \r |
103 | #ifdef _MSC_VER\r |
104 | \r |
105 | UInt32 a2, b2, c2, d2;\r |
106 | __asm xor EBX, EBX;\r |
107 | __asm xor ECX, ECX;\r |
108 | __asm xor EDX, EDX;\r |
109 | __asm mov EAX, function;\r |
110 | __asm cpuid;\r |
111 | __asm mov a2, EAX;\r |
112 | __asm mov b2, EBX;\r |
113 | __asm mov c2, ECX;\r |
114 | __asm mov d2, EDX;\r |
115 | \r |
116 | *a = a2;\r |
117 | *b = b2;\r |
118 | *c = c2;\r |
119 | *d = d2;\r |
120 | \r |
121 | #else\r |
122 | \r |
123 | __asm__ __volatile__ (\r |
124 | #if defined(MY_CPU_AMD64) && defined(__PIC__)\r |
125 | "mov %%rbx, %%rdi;"\r |
126 | "cpuid;"\r |
127 | "xchg %%rbx, %%rdi;"\r |
128 | : "=a" (*a) ,\r |
129 | "=D" (*b) ,\r |
130 | #elif defined(MY_CPU_X86) && defined(__PIC__)\r |
131 | "mov %%ebx, %%edi;"\r |
132 | "cpuid;"\r |
133 | "xchgl %%ebx, %%edi;"\r |
134 | : "=a" (*a) ,\r |
135 | "=D" (*b) ,\r |
136 | #else\r |
137 | "cpuid"\r |
138 | : "=a" (*a) ,\r |
139 | "=b" (*b) ,\r |
140 | #endif\r |
141 | "=c" (*c) ,\r |
142 | "=d" (*d)\r |
143 | : "0" (function), "c"(0) ) ;\r |
144 | \r |
145 | #endif\r |
146 | \r |
147 | #else\r |
148 | \r |
149 | int CPUInfo[4];\r |
150 | \r |
151 | MY__cpuidex(CPUInfo, (int)function, 0);\r |
152 | \r |
153 | *a = (UInt32)CPUInfo[0];\r |
154 | *b = (UInt32)CPUInfo[1];\r |
155 | *c = (UInt32)CPUInfo[2];\r |
156 | *d = (UInt32)CPUInfo[3];\r |
157 | \r |
158 | #endif\r |
159 | }\r |
160 | \r |
161 | BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)\r |
162 | {\r |
163 | CHECK_CPUID_IS_SUPPORTED\r |
164 | MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);\r |
165 | MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);\r |
166 | return True;\r |
167 | }\r |
168 | \r |
169 | static const UInt32 kVendors[][3] =\r |
170 | {\r |
171 | { 0x756E6547, 0x49656E69, 0x6C65746E},\r |
172 | { 0x68747541, 0x69746E65, 0x444D4163},\r |
173 | { 0x746E6543, 0x48727561, 0x736C7561}\r |
174 | };\r |
175 | \r |
176 | int x86cpuid_GetFirm(const Cx86cpuid *p)\r |
177 | {\r |
178 | unsigned i;\r |
179 | for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)\r |
180 | {\r |
181 | const UInt32 *v = kVendors[i];\r |
182 | if (v[0] == p->vendor[0] &&\r |
183 | v[1] == p->vendor[1] &&\r |
184 | v[2] == p->vendor[2])\r |
185 | return (int)i;\r |
186 | }\r |
187 | return -1;\r |
188 | }\r |
189 | \r |
190 | BoolInt CPU_Is_InOrder()\r |
191 | {\r |
192 | Cx86cpuid p;\r |
193 | int firm;\r |
194 | UInt32 family, model;\r |
195 | if (!x86cpuid_CheckAndRead(&p))\r |
196 | return True;\r |
197 | \r |
198 | family = x86cpuid_GetFamily(p.ver);\r |
199 | model = x86cpuid_GetModel(p.ver);\r |
200 | \r |
201 | firm = x86cpuid_GetFirm(&p);\r |
202 | \r |
203 | switch (firm)\r |
204 | {\r |
205 | case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (\r |
206 | /* In-Order Atom CPU */\r |
207 | model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */\r |
208 | || model == 0x26 /* 45 nm, Z6xx */\r |
209 | || model == 0x27 /* 32 nm, Z2460 */\r |
210 | || model == 0x35 /* 32 nm, Z2760 */\r |
211 | || model == 0x36 /* 32 nm, N2xxx, D2xxx */\r |
212 | )));\r |
213 | case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));\r |
214 | case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));\r |
215 | }\r |
216 | return True;\r |
217 | }\r |
218 | \r |
219 | #if !defined(MY_CPU_AMD64) && defined(_WIN32)\r |
648db22b |
220 | #include <windows.h>\r |
9e052883 |
221 | static BoolInt CPU_Sys_Is_SSE_Supported()\r |
222 | {\r |
223 | OSVERSIONINFO vi;\r |
224 | vi.dwOSVersionInfoSize = sizeof(vi);\r |
225 | if (!GetVersionEx(&vi))\r |
226 | return False;\r |
227 | return (vi.dwMajorVersion >= 5);\r |
228 | }\r |
229 | #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;\r |
230 | #else\r |
231 | #define CHECK_SYS_SSE_SUPPORT\r |
232 | #endif\r |
233 | \r |
234 | \r |
235 | static UInt32 X86_CPUID_ECX_Get_Flags()\r |
236 | {\r |
237 | Cx86cpuid p;\r |
238 | CHECK_SYS_SSE_SUPPORT\r |
239 | if (!x86cpuid_CheckAndRead(&p))\r |
240 | return 0;\r |
241 | return p.c;\r |
242 | }\r |
243 | \r |
244 | BoolInt CPU_IsSupported_AES()\r |
245 | {\r |
246 | return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;\r |
247 | }\r |
248 | \r |
249 | BoolInt CPU_IsSupported_SSSE3()\r |
250 | {\r |
251 | return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;\r |
252 | }\r |
253 | \r |
254 | BoolInt CPU_IsSupported_SSE41()\r |
255 | {\r |
256 | return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;\r |
257 | }\r |
258 | \r |
259 | BoolInt CPU_IsSupported_SHA()\r |
260 | {\r |
261 | Cx86cpuid p;\r |
262 | CHECK_SYS_SSE_SUPPORT\r |
263 | if (!x86cpuid_CheckAndRead(&p))\r |
264 | return False;\r |
265 | \r |
266 | if (p.maxFunc < 7)\r |
267 | return False;\r |
268 | {\r |
269 | UInt32 d[4] = { 0 };\r |
270 | MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);\r |
271 | return (d[1] >> 29) & 1;\r |
272 | }\r |
273 | }\r |
274 | \r |
275 | // #include <stdio.h>\r |
276 | \r |
277 | #ifdef _WIN32\r |
648db22b |
278 | #include <windows.h>\r |
9e052883 |
279 | #endif\r |
280 | \r |
281 | BoolInt CPU_IsSupported_AVX2()\r |
282 | {\r |
283 | Cx86cpuid p;\r |
284 | CHECK_SYS_SSE_SUPPORT\r |
285 | \r |
286 | #ifdef _WIN32\r |
287 | #define MY__PF_XSAVE_ENABLED 17\r |
288 | if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))\r |
289 | return False;\r |
290 | #endif\r |
291 | \r |
292 | if (!x86cpuid_CheckAndRead(&p))\r |
293 | return False;\r |
294 | if (p.maxFunc < 7)\r |
295 | return False;\r |
296 | {\r |
297 | UInt32 d[4] = { 0 };\r |
298 | MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);\r |
299 | // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);\r |
300 | return 1\r |
301 | & (d[1] >> 5); // avx2\r |
302 | }\r |
303 | }\r |
304 | \r |
305 | BoolInt CPU_IsSupported_VAES_AVX2()\r |
306 | {\r |
307 | Cx86cpuid p;\r |
308 | CHECK_SYS_SSE_SUPPORT\r |
309 | \r |
310 | #ifdef _WIN32\r |
311 | #define MY__PF_XSAVE_ENABLED 17\r |
312 | if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))\r |
313 | return False;\r |
314 | #endif\r |
315 | \r |
316 | if (!x86cpuid_CheckAndRead(&p))\r |
317 | return False;\r |
318 | if (p.maxFunc < 7)\r |
319 | return False;\r |
320 | {\r |
321 | UInt32 d[4] = { 0 };\r |
322 | MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);\r |
323 | // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);\r |
324 | return 1\r |
325 | & (d[1] >> 5) // avx2\r |
326 | // & (d[1] >> 31) // avx512vl\r |
327 | & (d[2] >> 9); // vaes // VEX-256/EVEX\r |
328 | }\r |
329 | }\r |
330 | \r |
331 | BoolInt CPU_IsSupported_PageGB()\r |
332 | {\r |
333 | Cx86cpuid cpuid;\r |
334 | if (!x86cpuid_CheckAndRead(&cpuid))\r |
335 | return False;\r |
336 | {\r |
337 | UInt32 d[4] = { 0 };\r |
338 | MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);\r |
339 | if (d[0] < 0x80000001)\r |
340 | return False;\r |
341 | }\r |
342 | {\r |
343 | UInt32 d[4] = { 0 };\r |
344 | MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);\r |
345 | return (d[3] >> 26) & 1;\r |
346 | }\r |
347 | }\r |
348 | \r |
349 | \r |
350 | #elif defined(MY_CPU_ARM_OR_ARM64)\r |
351 | \r |
352 | #ifdef _WIN32\r |
353 | \r |
648db22b |
354 | #include <windows.h>\r |
9e052883 |
355 | \r |
356 | BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }\r |
357 | BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }\r |
358 | BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }\r |
359 | \r |
360 | #else\r |
361 | \r |
362 | #if defined(__APPLE__)\r |
363 | \r |
364 | /*\r |
365 | #include <stdio.h>\r |
366 | #include <string.h>\r |
367 | static void Print_sysctlbyname(const char *name)\r |
368 | {\r |
369 | size_t bufSize = 256;\r |
370 | char buf[256];\r |
371 | int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);\r |
372 | {\r |
373 | int i;\r |
374 | printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);\r |
375 | for (i = 0; i < 20; i++)\r |
376 | printf(" %2x", (unsigned)(Byte)buf[i]);\r |
377 | \r |
378 | }\r |
379 | }\r |
380 | */\r |
381 | \r |
382 | static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)\r |
383 | {\r |
384 | UInt32 val = 0;\r |
385 | if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)\r |
386 | return 1;\r |
387 | return 0;\r |
388 | }\r |
389 | \r |
390 | /*\r |
391 | Print_sysctlbyname("hw.pagesize");\r |
392 | Print_sysctlbyname("machdep.cpu.brand_string");\r |
393 | */\r |
394 | \r |
395 | BoolInt CPU_IsSupported_CRC32(void)\r |
396 | {\r |
397 | return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");\r |
398 | }\r |
399 | \r |
400 | BoolInt CPU_IsSupported_NEON(void)\r |
401 | {\r |
402 | return My_sysctlbyname_Get_BoolInt("hw.optional.neon");\r |
403 | }\r |
404 | \r |
405 | #ifdef MY_CPU_ARM64\r |
406 | #define APPLE_CRYPTO_SUPPORT_VAL 1\r |
407 | #else\r |
408 | #define APPLE_CRYPTO_SUPPORT_VAL 0\r |
409 | #endif\r |
410 | \r |
411 | BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }\r |
412 | BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }\r |
413 | BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }\r |
414 | \r |
415 | \r |
416 | #else // __APPLE__\r |
417 | \r |
418 | #if defined(__SWITCH__) || defined(__vita__)\r |
419 | \r |
420 | BoolInt CPU_IsSupported_CRC32(void) { return 0; }\r |
421 | BoolInt CPU_IsSupported_NEON(void) { return 1; }\r |
422 | BoolInt CPU_IsSupported_SHA1(void) { return 0; }\r |
423 | BoolInt CPU_IsSupported_SHA2(void) { return 0; }\r |
424 | BoolInt CPU_IsSupported_AES (void) { return 0; }\r |
425 | \r |
426 | #else\r |
427 | \r |
2fb84655 |
428 | #if (defined(__ANDROID_API__) && __ANDROID_API__ < 18) || \\r |
429 | defined(_MIYOO) || defined(_3DS) || defined(NO_HWCAP)\r |
722c1383 |
430 | // no getauxval/AT_HWCAP\r |
431 | #else\r |
9e052883 |
432 | #define USE_HWCAP\r |
722c1383 |
433 | #endif\r |
9e052883 |
434 | \r |
435 | #ifdef USE_HWCAP\r |
436 | \r |
437 | #include <asm/hwcap.h>\r |
722c1383 |
438 | #include <sys/auxv.h>\r |
9e052883 |
439 | \r |
440 | #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \\r |
441 | BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }\r |
442 | \r |
443 | #ifdef MY_CPU_ARM64\r |
444 | #define MY_HWCAP_CHECK_FUNC(name) \\r |
445 | MY_HWCAP_CHECK_FUNC_2(name, name)\r |
446 | MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)\r |
447 | // MY_HWCAP_CHECK_FUNC (ASIMD)\r |
448 | #elif defined(MY_CPU_ARM)\r |
449 | #define MY_HWCAP_CHECK_FUNC(name) \\r |
450 | BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }\r |
451 | MY_HWCAP_CHECK_FUNC_2(NEON, NEON)\r |
452 | #endif\r |
453 | \r |
454 | #else // USE_HWCAP\r |
455 | \r |
456 | #define MY_HWCAP_CHECK_FUNC(name) \\r |
457 | BoolInt CPU_IsSupported_ ## name() { return 0; }\r |
458 | MY_HWCAP_CHECK_FUNC(NEON)\r |
459 | \r |
460 | #endif // USE_HWCAP\r |
461 | \r |
462 | MY_HWCAP_CHECK_FUNC (CRC32)\r |
463 | MY_HWCAP_CHECK_FUNC (SHA1)\r |
464 | MY_HWCAP_CHECK_FUNC (SHA2)\r |
465 | MY_HWCAP_CHECK_FUNC (AES)\r |
466 | \r |
467 | #endif\r |
468 | #endif // __APPLE__\r |
469 | #endif // _WIN32\r |
470 | \r |
471 | #endif // MY_CPU_ARM_OR_ARM64\r |
472 | \r |
473 | \r |
474 | \r |
475 | #ifdef __APPLE__\r |
476 | \r |
477 | #include <sys/sysctl.h>\r |
478 | \r |
479 | int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)\r |
480 | {\r |
481 | return sysctlbyname(name, buf, bufSize, NULL, 0);\r |
482 | }\r |
483 | \r |
484 | int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)\r |
485 | {\r |
486 | size_t bufSize = sizeof(*val);\r |
487 | int res = My_sysctlbyname_Get(name, val, &bufSize);\r |
488 | if (res == 0 && bufSize != sizeof(*val))\r |
489 | return EFAULT;\r |
490 | return res;\r |
491 | }\r |
492 | \r |
493 | #endif\r |