-rw-r--r-- 2526 djbsort-20260127/sortbench/bench64-x86simdsort.cc raw
/* WARNING: auto-generated (by sortbench/benchgen.py); do not edit */
#include <unistd.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <math.h>
#include <assert.h>
#include <algorithm>
#include "cpucycles.h"
#include "x86simdsort.h"
typedef int64_t sorttype;
#define N 131072
#define TIMINGS 64
typedef long long num;
sorttype r[N] __attribute__((aligned(4096)));
sorttype perm[TIMINGS+1][N] __attribute__((aligned(4096)));
sorttype x[(TIMINGS+1)*(N+1)] __attribute__((aligned(4096)));
sorttype y[N] __attribute__((aligned(4096)));
num t[TIMINGS+1] __attribute__((aligned(4096)));
int main()
{
printf("cpucycles_version %s\n",cpucycles_version());
printf("cpucycles_implementation %s\n",cpucycles_implementation());
srandom(getpid());
for (num loop = 0;loop < 2;++loop)
for (num j = 0;j <= TIMINGS;++j)
t[j] = cpucycles();
printf("overhead");
for (num i = 0;i < TIMINGS;++i)
printf(" %lld",t[i+1]-t[i]);
printf("\n");
for (num j = 0;j <= TIMINGS;++j)
for (num i = 0;i < N;++i)
perm[j][i] = i;
num randomized = 0;
for (num npos = 0;;++npos) {
num nposlow = npos%24;
num nposhigh = npos/24;
num n = round(exp2(nposhigh+round(27.75001*nposlow)/665));
if (n > N) break;
num npad = n+!(n&1);
for (;randomized < n;++randomized)
for (num j = 0;j <= TIMINGS;++j) {
num pos = random()%(randomized+1);
sorttype tmp = perm[j][pos];
perm[j][pos] = perm[j][randomized];
perm[j][randomized] = tmp;
}
for (num i = 0;i < n;++i) {
r[i] = 0;
for (num loop = 0;loop < 8;++loop)
r[i] = (r[i]^(r[i]<<15))+random();
}
for (num j = 0;j <= TIMINGS;++j)
for (num i = 0;i < n;++i)
x[j*npad+i] = r[perm[j][i]];
num sum = 0;
for (num j = 0;j <= TIMINGS;++j)
for (num i = 0;i < n;++i)
sum += x[j*npad+i];
for (num j = 0;j <= TIMINGS;++j)
t[j] = cpucycles();
for (num j = 0;j <= TIMINGS;++j) {
sorttype *y;
t[j] = cpucycles();
y = x+j*npad;
x86simdsort::qsort(y,n,0);
}
for (num i = 0;i < n;++i)
y[i] = r[i];
std::sort(y,y+n);
for (num j = 0;j <= TIMINGS;++j)
for (num i = 0;i < n;++i)
assert(y[i] == x[j*npad+i]);
for (num j = 0;j <= TIMINGS;++j)
for (num i = 0;i < n;++i)
sum -= x[j*npad+i];
assert(sum == 0);
printf("%lld",n);
for (num i = 0;i < TIMINGS;++i)
printf(" %lld",t[i+1]-t[i]);
printf("\n");
}
return 0;
}