/* gcc has __builtin_cpu_supports("avx2") but implemented it incorrectly until 2018: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100 cannot expect all of those machines to have upgraded gcc yet furthermore, why is checking just for avx2 enough? has intel guaranteed that it will never introduce a cpu with avx2 instructions and without (e.g.) sse4.2? so manually check cpuid and xgetbv here and include all the "lower" instruction sets rather than trying to guess which ones are implied */ #include #ifdef __FILC__ #include #include #elif defined(_MSC_VER) #include #include #endif static void cpuid0(uint32_t func,uint32_t *a,uint32_t *b,uint32_t *c,uint32_t *d) { #ifdef __FILC__ __get_cpuid(func,a,b,c,d); #elif defined(_MSC_VER) uint32_t x[4]; __cpuid(x,func); *a = x[0]; *b = x[1]; *c = x[2]; *d = x[3]; #else asm volatile("cpuid":"=a"(*a),"=b"(*b),"=c"(*c),"=d"(*d):"a"(func),"c"(0)); #endif } static uint64_t xgetbv0(void) { #ifdef __FILC__ return zxgetbv(); #elif defined(_MSC_VER) return _xgetbv(0); #else uint32_t a,d; asm(".byte 15;.byte 1;.byte 208":"=a"(a),"=d"(d):"c"(0)); return a|(((uint64_t)d)<<32); #endif } #define WANT_1_3 ((1<<23)|(1<<25)|(1<<26)) /* 23=mmx; 25=sse; 26=sse2 */ #define WANT_1_2 ((1<<0)|(1<<9)|(1<<19)|(1<<20)|(1<<23)|(1<<27)|(1<<28)) /* 0=sse3; 9=ssse3; 19=sse41; 20=sse42; 23=popcnt; 27=osxsave; 28=avx */ #define WANT_7_1 ((1<<3)|(1<<5)|(1<<8)) /* 3=bmi1; 5=avx2; 8=bmi2 */ #define WANT_XCR ((1<<1)|(1<<2)) /* 1=xmm; 2=ymm */ int supports(void) { uint32_t cpuidmax,id0,id1,id2; uint32_t familymodelstepping; uint32_t feature0,feature1,feature2,feature3; uint64_t xcr; cpuid0(0,&cpuidmax,&id0,&id1,&id2); if (cpuidmax < 7) return 0; cpuid0(1,&familymodelstepping,&feature1,&feature2,&feature3); if (WANT_1_2 != (WANT_1_2 & feature2)) return 0; if (WANT_1_3 != (WANT_1_3 & feature3)) return 0; cpuid0(7,&feature0,&feature1,&feature2,&feature3); if (WANT_7_1 != (WANT_7_1 & feature1)) return 0; xcr = xgetbv0(); if (WANT_XCR != (WANT_XCR & xcr)) return 0; return 1; }