-rw-r--r-- 2138 djbsort-20260127/compilers/amd64+sse3+ssse3+sse41+popcnt+avx+bmi1+bmi2+avx2.c raw
/*
gcc has __builtin_cpu_supports("avx2")
but implemented it incorrectly until 2018:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100
cannot expect all of those machines to have upgraded gcc yet
furthermore, why is checking just for avx2 enough?
has intel guaranteed that it will never introduce
a cpu with avx2 instructions and without (e.g.) sse4.2?
so manually check cpuid and xgetbv here
and include all the "lower" instruction sets
rather than trying to guess which ones are implied
*/
#include <inttypes.h>
#ifdef __FILC__
#include <cpuid.h>
#include <stdfil.h>
#elif defined(_MSC_VER)
#include <immintrin.h>
#include <intrin.h>
#endif
static void cpuid0(uint32_t func,uint32_t *a,uint32_t *b,uint32_t *c,uint32_t *d)
{
#ifdef __FILC__
__get_cpuid(func,a,b,c,d);
#elif defined(_MSC_VER)
uint32_t x[4];
__cpuid(x,func);
*a = x[0];
*b = x[1];
*c = x[2];
*d = x[3];
#else
asm volatile("cpuid":"=a"(*a),"=b"(*b),"=c"(*c),"=d"(*d):"a"(func),"c"(0));
#endif
}
static uint64_t xgetbv0(void)
{
#ifdef __FILC__
return zxgetbv();
#elif defined(_MSC_VER)
return _xgetbv(0);
#else
uint32_t a,d;
asm(".byte 15;.byte 1;.byte 208":"=a"(a),"=d"(d):"c"(0));
return a|(((uint64_t)d)<<32);
#endif
}
#define WANT_1_3 ((1<<23)|(1<<25)|(1<<26))
/* 23=mmx; 25=sse; 26=sse2 */
#define WANT_1_2 ((1<<0)|(1<<9)|(1<<19)|(1<<20)|(1<<23)|(1<<27)|(1<<28))
/* 0=sse3; 9=ssse3; 19=sse41; 20=sse42; 23=popcnt; 27=osxsave; 28=avx */
#define WANT_7_1 ((1<<3)|(1<<5)|(1<<8))
/* 3=bmi1; 5=avx2; 8=bmi2 */
#define WANT_XCR ((1<<1)|(1<<2))
/* 1=xmm; 2=ymm */
int supports(void)
{
uint32_t cpuidmax,id0,id1,id2;
uint32_t familymodelstepping;
uint32_t feature0,feature1,feature2,feature3;
uint64_t xcr;
cpuid0(0,&cpuidmax,&id0,&id1,&id2);
if (cpuidmax < 7) return 0;
cpuid0(1,&familymodelstepping,&feature1,&feature2,&feature3);
if (WANT_1_2 != (WANT_1_2 & feature2)) return 0;
if (WANT_1_3 != (WANT_1_3 & feature3)) return 0;
cpuid0(7,&feature0,&feature1,&feature2,&feature3);
if (WANT_7_1 != (WANT_7_1 & feature1)) return 0;
xcr = xgetbv0();
if (WANT_XCR != (WANT_XCR & xcr)) return 0;
return 1;
}