-rw-r--r-- 16884 djbsort-20260127/command/djbsort-speed.c raw
/* WARNING: auto-generated (by autogen/speed); do not edit */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <assert.h> #include <time.h> #include <sys/time.h> #include <sys/types.h> #include <sys/resource.h> #include <cpucycles.h> /* -lcpucycles */ #include <djbsort.h> /* -ldjbsort */ #include "limits.inc" static void *callocplus(long long len) { void *x = calloc(1,len + 128); if (!x) abort(); return x; } static void *aligned(void *x) { unsigned char *y = (unsigned char *) x; y += 63 & (-(unsigned long) x); return y; } static void longlong_sort(long long *x,long long n) { long long top,p,q,r,i; if (n < 2) return; top = 1; while (top < n - top) top += top; for (p = top;p > 0;p >>= 1) { for (i = 0;i < n - p;++i) if (!(i & p)) if (x[i] > x[i+p]) { long long t = x[i]; x[i] = x[i+p]; x[i+p] = t; } i = 0; for (q = top;q > p;q >>= 1) { for (;i < n - q;++i) { if (!(i & p)) { long long a = x[i + p]; for (r = q;r > p;r >>= 1) if (a > x[i+r]) { long long t = a; a = x[i+r]; x[i+r] = t; } x[i + p] = a; } } } } } #define TIMINGS 32 /* must be multiple of 4 */ static long long t[TIMINGS+1]; static void t_print(const char *op,long long impl,long long len) { long long tsort[TIMINGS]; long long iqm = 0; printf("%s",op); if (impl >= 0) printf(" %lld",impl); else printf(" selected"); printf(" %lld",len); for (long long i = 0;i < TIMINGS;++i) tsort[i] = t[i] = t[i+1]-t[i]; longlong_sort(tsort,TIMINGS); for (long long j = TIMINGS/4;j < 3*TIMINGS/4;++j) iqm += tsort[j]; iqm *= 2; iqm += TIMINGS/2; iqm /= TIMINGS; printf(" %lld ",iqm); for (long long i = 0;i < TIMINGS;++i) printf("%+lld",t[i]-iqm); printf("\n"); fflush(stdout); } static void measure_cpucycles(void) { printf("cpucycles selected persecond %lld\n",cpucycles_persecond()); printf("cpucycles selected implementation %s\n",cpucycles_implementation()); for (long long i = 0;i <= TIMINGS;++i) t[i] = cpucycles(); t_print("cpucycles",-1,0); } #define MAXTEST_BYTES 65536 static const char *targeto = 0; static const char *targetp = 0; static const char *targeti = 0; static void measure_sort_int32(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"int32")) return; void *mstorage = callocplus(MAXTEST_BYTES); int32_t *m = (int32_t *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_int32();++impl) { void (*crypto_sort)(int32_t *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_int32_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_int32(impl); printf("sort_int32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int32_implementation(impl),djbsort_dispatch_int32_compiler(impl)); } else { crypto_sort = djbsort_int32; printf("sort_int32 selected implementation %s compiler %s\n",djbsort_int32_implementation(),djbsort_int32_compiler()); } mlen = 0; while (4*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_int32",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_int32down(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"int32down")) return; void *mstorage = callocplus(MAXTEST_BYTES); int32_t *m = (int32_t *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_int32down();++impl) { void (*crypto_sort)(int32_t *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_int32down_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_int32down(impl); printf("sort_int32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int32down_implementation(impl),djbsort_dispatch_int32down_compiler(impl)); } else { crypto_sort = djbsort_int32down; printf("sort_int32down selected implementation %s compiler %s\n",djbsort_int32down_implementation(),djbsort_int32down_compiler()); } mlen = 0; while (4*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_int32down",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_uint32(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"uint32")) return; void *mstorage = callocplus(MAXTEST_BYTES); uint32_t *m = (uint32_t *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_uint32();++impl) { void (*crypto_sort)(uint32_t *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_uint32_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_uint32(impl); printf("sort_uint32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint32_implementation(impl),djbsort_dispatch_uint32_compiler(impl)); } else { crypto_sort = djbsort_uint32; printf("sort_uint32 selected implementation %s compiler %s\n",djbsort_uint32_implementation(),djbsort_uint32_compiler()); } mlen = 0; while (4*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_uint32",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_uint32down(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"uint32down")) return; void *mstorage = callocplus(MAXTEST_BYTES); uint32_t *m = (uint32_t *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_uint32down();++impl) { void (*crypto_sort)(uint32_t *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_uint32down_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_uint32down(impl); printf("sort_uint32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint32down_implementation(impl),djbsort_dispatch_uint32down_compiler(impl)); } else { crypto_sort = djbsort_uint32down; printf("sort_uint32down selected implementation %s compiler %s\n",djbsort_uint32down_implementation(),djbsort_uint32down_compiler()); } mlen = 0; while (4*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_uint32down",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_float32(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"float32")) return; void *mstorage = callocplus(MAXTEST_BYTES); float *m = (float *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_float32();++impl) { void (*crypto_sort)(float *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_float32_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_float32(impl); printf("sort_float32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float32_implementation(impl),djbsort_dispatch_float32_compiler(impl)); } else { crypto_sort = djbsort_float32; printf("sort_float32 selected implementation %s compiler %s\n",djbsort_float32_implementation(),djbsort_float32_compiler()); } mlen = 0; while (4*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_float32",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_float32down(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"float32down")) return; void *mstorage = callocplus(MAXTEST_BYTES); float *m = (float *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_float32down();++impl) { void (*crypto_sort)(float *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_float32down_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_float32down(impl); printf("sort_float32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float32down_implementation(impl),djbsort_dispatch_float32down_compiler(impl)); } else { crypto_sort = djbsort_float32down; printf("sort_float32down selected implementation %s compiler %s\n",djbsort_float32down_implementation(),djbsort_float32down_compiler()); } mlen = 0; while (4*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_float32down",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_int64(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"int64")) return; void *mstorage = callocplus(MAXTEST_BYTES); int64_t *m = (int64_t *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_int64();++impl) { void (*crypto_sort)(int64_t *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_int64_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_int64(impl); printf("sort_int64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int64_implementation(impl),djbsort_dispatch_int64_compiler(impl)); } else { crypto_sort = djbsort_int64; printf("sort_int64 selected implementation %s compiler %s\n",djbsort_int64_implementation(),djbsort_int64_compiler()); } mlen = 0; while (8*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_int64",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_int64down(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"int64down")) return; void *mstorage = callocplus(MAXTEST_BYTES); int64_t *m = (int64_t *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_int64down();++impl) { void (*crypto_sort)(int64_t *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_int64down_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_int64down(impl); printf("sort_int64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int64down_implementation(impl),djbsort_dispatch_int64down_compiler(impl)); } else { crypto_sort = djbsort_int64down; printf("sort_int64down selected implementation %s compiler %s\n",djbsort_int64down_implementation(),djbsort_int64down_compiler()); } mlen = 0; while (8*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_int64down",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_uint64(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"uint64")) return; void *mstorage = callocplus(MAXTEST_BYTES); uint64_t *m = (uint64_t *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_uint64();++impl) { void (*crypto_sort)(uint64_t *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_uint64_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_uint64(impl); printf("sort_uint64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint64_implementation(impl),djbsort_dispatch_uint64_compiler(impl)); } else { crypto_sort = djbsort_uint64; printf("sort_uint64 selected implementation %s compiler %s\n",djbsort_uint64_implementation(),djbsort_uint64_compiler()); } mlen = 0; while (8*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_uint64",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_uint64down(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"uint64down")) return; void *mstorage = callocplus(MAXTEST_BYTES); uint64_t *m = (uint64_t *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_uint64down();++impl) { void (*crypto_sort)(uint64_t *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_uint64down_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_uint64down(impl); printf("sort_uint64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint64down_implementation(impl),djbsort_dispatch_uint64down_compiler(impl)); } else { crypto_sort = djbsort_uint64down; printf("sort_uint64down selected implementation %s compiler %s\n",djbsort_uint64down_implementation(),djbsort_uint64down_compiler()); } mlen = 0; while (8*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_uint64down",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_float64(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"float64")) return; void *mstorage = callocplus(MAXTEST_BYTES); double *m = (double *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_float64();++impl) { void (*crypto_sort)(double *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_float64_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_float64(impl); printf("sort_float64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float64_implementation(impl),djbsort_dispatch_float64_compiler(impl)); } else { crypto_sort = djbsort_float64; printf("sort_float64 selected implementation %s compiler %s\n",djbsort_float64_implementation(),djbsort_float64_compiler()); } mlen = 0; while (8*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_float64",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } static void measure_sort_float64down(void) { if (targeto && strcmp(targeto,"sort")) return; if (targetp && strcmp(targetp,"float64down")) return; void *mstorage = callocplus(MAXTEST_BYTES); double *m = (double *) aligned(mstorage); long long mlen; for (long long impl = -1;impl < djbsort_numimpl_float64down();++impl) { void (*crypto_sort)(double *,long long); if (targeti && strcmp(targeti,djbsort_dispatch_float64down_implementation(impl))) continue; if (impl >= 0) { crypto_sort = djbsort_dispatch_float64down(impl); printf("sort_float64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float64down_implementation(impl),djbsort_dispatch_float64down_compiler(impl)); } else { crypto_sort = djbsort_float64down; printf("sort_float64down selected implementation %s compiler %s\n",djbsort_float64down_implementation(),djbsort_float64down_compiler()); } mlen = 0; while (8*mlen <= MAXTEST_BYTES) { for (long long i = 0;i <= TIMINGS;++i) { t[i] = cpucycles(); crypto_sort(m,mlen); } t_print("sort_float64down",impl,mlen); mlen += 1+mlen/2; } } free(mstorage); } #include "print_cpuid.inc" int main(int argc,char **argv) { printf("djbsort version %s\n",djbsort_version()); printf("djbsort arch %s\n",djbsort_arch()); print_cpuid(); if (*argv) ++argv; if (*argv) { targeto = *argv++; if (*argv) { targetp = *argv++; if (*argv) { targeti = *argv++; } } } measure_cpucycles(); limits(); measure_sort_int32(); measure_sort_int32down(); measure_sort_uint32(); measure_sort_uint32down(); measure_sort_float32(); measure_sort_float32down(); measure_sort_int64(); measure_sort_int64down(); measure_sort_uint64(); measure_sort_uint64down(); measure_sort_float64(); measure_sort_float64down(); return 0; }