/* WARNING: auto-generated (by autogen/useint); do not edit */ #include typedef __m256i int32x8; #define int32x8_load(z) _mm256_loadu_si256((__m256i *) (z)) #define int32x8_store(z,i) _mm256_storeu_si256((__m256i *) (z),(i)) #define int32x8_broadcast _mm256_set1_epi32 #define int32x8_floatmask(y) _mm256_srli_epi32(_mm256_srai_epi32(y,31),1) #include "djbsort.h" #include "float32_sort.h" #include "crypto_int32.h" void float32_sort(float *x,long long n) { int32_t *y = (int32_t *) x; long long j; for (j = 0;j+16 <= n;j += 16) { int32x8 y0 = int32x8_load(y+j); int32x8 y1 = int32x8_load(y+j+8); y0 ^= int32x8_floatmask(y0); y1 ^= int32x8_floatmask(y1); int32x8_store(y+j,y0); int32x8_store(y+j+8,y1); } for (;j < n;++j) { int32_t yj = y[j]; yj ^= ((uint32_t) crypto_int32_negative_mask(yj)) >> 1; y[j] = yj; } djbsort_int32(y,n); for (j = 0;j+16 <= n;j += 16) { int32x8 y0 = int32x8_load(y+j); int32x8 y1 = int32x8_load(y+j+8); y0 ^= int32x8_floatmask(y0); y1 ^= int32x8_floatmask(y1); int32x8_store(y+j,y0); int32x8_store(y+j+8,y1); } for (;j < n;++j) { int32_t yj = y[j]; yj ^= ((uint32_t) crypto_int32_negative_mask(yj)) >> 1; y[j] = yj; } }