-rwxr-xr-x 4444 djbsort-20260127/autogen/speed raw
#!/usr/bin/env python3
print(r'''/* WARNING: auto-generated (by autogen/speed); do not edit */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/resource.h>
#include <cpucycles.h> /* -lcpucycles */
#include <djbsort.h> /* -ldjbsort */
#include "limits.inc"
static void *callocplus(long long len)
{
void *x = calloc(1,len + 128);
if (!x) abort();
return x;
}
static void *aligned(void *x)
{
unsigned char *y = (unsigned char *) x;
y += 63 & (-(unsigned long) x);
return y;
}
static void longlong_sort(long long *x,long long n)
{
long long top,p,q,r,i;
if (n < 2) return;
top = 1;
while (top < n - top) top += top;
for (p = top;p > 0;p >>= 1) {
for (i = 0;i < n - p;++i)
if (!(i & p))
if (x[i] > x[i+p]) {
long long t = x[i];
x[i] = x[i+p];
x[i+p] = t;
}
i = 0;
for (q = top;q > p;q >>= 1) {
for (;i < n - q;++i) {
if (!(i & p)) {
long long a = x[i + p];
for (r = q;r > p;r >>= 1)
if (a > x[i+r]) {
long long t = a;
a = x[i+r];
x[i+r] = t;
}
x[i + p] = a;
}
}
}
}
}
#define TIMINGS 32 /* must be multiple of 4 */
static long long t[TIMINGS+1];
static void t_print(const char *op,long long impl,long long len)
{
long long tsort[TIMINGS];
long long iqm = 0;
printf("%s",op);
if (impl >= 0)
printf(" %lld",impl);
else
printf(" selected");
printf(" %lld",len);
for (long long i = 0;i < TIMINGS;++i)
tsort[i] = t[i] = t[i+1]-t[i];
longlong_sort(tsort,TIMINGS);
for (long long j = TIMINGS/4;j < 3*TIMINGS/4;++j)
iqm += tsort[j];
iqm *= 2;
iqm += TIMINGS/2;
iqm /= TIMINGS;
printf(" %lld ",iqm);
for (long long i = 0;i < TIMINGS;++i)
printf("%+lld",t[i]-iqm);
printf("\n");
fflush(stdout);
}
static void measure_cpucycles(void)
{
printf("cpucycles selected persecond %lld\n",cpucycles_persecond());
printf("cpucycles selected implementation %s\n",cpucycles_implementation());
for (long long i = 0;i <= TIMINGS;++i)
t[i] = cpucycles();
t_print("cpucycles",-1,0);
}
#define MAXTEST_BYTES 65536
static const char *targeto = 0;
static const char *targetp = 0;
static const char *targeti = 0;
''')
measuresort = ''
for bytes in 4,8:
bits = 8*bytes
for S,T in (
(f'int{bits}',f'int{bits}_t'),
(f'int{bits}down',f'int{bits}_t'),
(f'uint{bits}',f'uint{bits}_t'),
(f'uint{bits}down',f'uint{bits}_t'),
(f'float{bits}',f'float{bits}_t'),
(f'float{bits}down',f'float{bits}_t'),
):
if T == 'float16_t': T = 'void' # compiler support for float16 is currently spotty
if T == 'float32_t': T = 'float'
if T == 'float64_t': T = 'double'
print(fr'''static void measure_sort_{S}(void)
{{
if (targeto && strcmp(targeto,"sort")) return;
if (targetp && strcmp(targetp,"{S}")) return;
void *mstorage = callocplus(MAXTEST_BYTES);
{T} *m = ({T} *) aligned(mstorage);
long long mlen;
for (long long impl = -1;impl < djbsort_numimpl_{S}();++impl) {{
void (*crypto_sort)({T} *,long long);
if (targeti && strcmp(targeti,djbsort_dispatch_{S}_implementation(impl))) continue;
if (impl >= 0) {{
crypto_sort = djbsort_dispatch_{S}(impl);
printf("sort_{S} %lld implementation %s compiler %s\n",impl,djbsort_dispatch_{S}_implementation(impl),djbsort_dispatch_{S}_compiler(impl));
}} else {{
crypto_sort = djbsort_{S};
printf("sort_{S} selected implementation %s compiler %s\n",djbsort_{S}_implementation(),djbsort_{S}_compiler());
}}
mlen = 0;
while ({bytes}*mlen <= MAXTEST_BYTES) {{
for (long long i = 0;i <= TIMINGS;++i) {{
t[i] = cpucycles();
crypto_sort(m,mlen);
}}
t_print("sort_{S}",impl,mlen);
mlen += 1+mlen/2;
}}
}}
free(mstorage);
}}
''')
measuresort += f' measure_sort_{S}();\n'
print(fr'''#include "print_cpuid.inc"
int main(int argc,char **argv)
{{
printf("djbsort version %s\n",djbsort_version());
printf("djbsort arch %s\n",djbsort_arch());
print_cpuid();
if (*argv) ++argv;
if (*argv) {{
targeto = *argv++;
if (*argv) {{
targetp = *argv++;
if (*argv) {{
targeti = *argv++;
}}
}}
}}
measure_cpucycles();
limits();
{measuresort}
return 0;
}}''')