-rw-r--r-- 1412 djbsort-20260127/float64down/avx2useint64/sort.c raw
/* WARNING: auto-generated (by autogen/useint); do not edit */
#include <immintrin.h>
typedef __m256i int64x4;
#define int64x4_load(z) _mm256_loadu_si256((__m256i *) (z))
#define int64x4_store(z,i) _mm256_storeu_si256((__m256i *) (z),(i))
#define int64x4_broadcast _mm256_set1_epi64x
#define int64x4_floatmask(y) _mm256_sub_epi64((y)&int64x4_broadcast(0x8000000000000000ULL),_mm256_srli_epi64(y,63))
#include "djbsort.h"
#include "float64down_sort.h"
#include "crypto_int64.h"
void float64down_sort(double *x,long long n)
{
int64_t *y = (int64_t *) x;
long long j;
for (j = 0;j+8 <= n;j += 8) {
int64x4 y0 = int64x4_load(y+j);
int64x4 y1 = int64x4_load(y+j+4);
y0 ^= int64x4_floatmask(y0);
y1 ^= int64x4_floatmask(y1);
int64x4_store(y+j,y0 ^ int64x4_broadcast(-1));
int64x4_store(y+j+4,y1 ^ int64x4_broadcast(-1));
}
for (;j < n;++j) {
int64_t yj = y[j];
yj ^= ((uint64_t) crypto_int64_negative_mask(yj)) >> 1;
y[j] = yj ^ -1;
}
djbsort_int64(y,n);
for (j = 0;j+8 <= n;j += 8) {
int64x4 y0 = int64x4_load(y+j) ^ int64x4_broadcast(-1);
int64x4 y1 = int64x4_load(y+j+4) ^ int64x4_broadcast(-1);
y0 ^= int64x4_floatmask(y0);
y1 ^= int64x4_floatmask(y1);
int64x4_store(y+j,y0);
int64x4_store(y+j+4,y1);
}
for (;j < n;++j) {
int64_t yj = y[j] ^ -1;
yj ^= ((uint64_t) crypto_int64_negative_mask(yj)) >> 1;
y[j] = yj;
}
}