/* WARNING: auto-generated (by autogen/speed); do not edit */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/resource.h>
#include <cpucycles.h> /* -lcpucycles */
#include <djbsort.h> /* -ldjbsort */

#include "limits.inc"

static void *callocplus(long long len)
{
  void *x = calloc(1,len + 128);
  if (!x) abort();
  return x;
}

static void *aligned(void *x)
{
  unsigned char *y = (unsigned char *) x;
  y += 63 & (-(unsigned long) x);
  return y;
}

static void longlong_sort(long long *x,long long n)
{
  long long top,p,q,r,i;

  if (n < 2) return;
  top = 1;
  while (top < n - top) top += top;

  for (p = top;p > 0;p >>= 1) {
    for (i = 0;i < n - p;++i)
      if (!(i & p))
        if (x[i] > x[i+p]) {
          long long t = x[i];
          x[i] = x[i+p];
          x[i+p] = t;
        }
    i = 0;
    for (q = top;q > p;q >>= 1) {
      for (;i < n - q;++i) {
        if (!(i & p)) {
          long long a = x[i + p];
          for (r = q;r > p;r >>= 1)
            if (a > x[i+r]) {
              long long t = a;
              a = x[i+r];
              x[i+r] = t;
            }
          x[i + p] = a;
        }
      }
    }
  }
}

#define TIMINGS 32 /* must be multiple of 4 */
static long long t[TIMINGS+1];

static void t_print(const char *op,long long impl,long long len)
{
  long long tsort[TIMINGS];
  long long iqm = 0;

  printf("%s",op);
  if (impl >= 0)
    printf(" %lld",impl);
  else
    printf(" selected");
  printf(" %lld",len);
  for (long long i = 0;i < TIMINGS;++i)
    tsort[i] = t[i] = t[i+1]-t[i];
  longlong_sort(tsort,TIMINGS);

  for (long long j = TIMINGS/4;j < 3*TIMINGS/4;++j)
    iqm += tsort[j];
  iqm *= 2;
  iqm += TIMINGS/2;
  iqm /= TIMINGS;

  printf(" %lld ",iqm);
  for (long long i = 0;i < TIMINGS;++i)
    printf("%+lld",t[i]-iqm);
  printf("\n");
  fflush(stdout);
}

static void measure_cpucycles(void)
{
  printf("cpucycles selected persecond %lld\n",cpucycles_persecond());
  printf("cpucycles selected implementation %s\n",cpucycles_implementation());

  for (long long i = 0;i <= TIMINGS;++i)
    t[i] = cpucycles();
  t_print("cpucycles",-1,0);
}

#define MAXTEST_BYTES 65536

static const char *targeto = 0;
static const char *targetp = 0;
static const char *targeti = 0;

static void measure_sort_int32(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"int32")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  int32_t *m = (int32_t *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_int32();++impl) {
    void (*crypto_sort)(int32_t *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_int32_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_int32(impl);
      printf("sort_int32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int32_implementation(impl),djbsort_dispatch_int32_compiler(impl));
    } else {
      crypto_sort = djbsort_int32;
      printf("sort_int32 selected implementation %s compiler %s\n",djbsort_int32_implementation(),djbsort_int32_compiler());
    }
    mlen = 0;
    while (4*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_int32",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_int32down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"int32down")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  int32_t *m = (int32_t *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_int32down();++impl) {
    void (*crypto_sort)(int32_t *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_int32down_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_int32down(impl);
      printf("sort_int32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int32down_implementation(impl),djbsort_dispatch_int32down_compiler(impl));
    } else {
      crypto_sort = djbsort_int32down;
      printf("sort_int32down selected implementation %s compiler %s\n",djbsort_int32down_implementation(),djbsort_int32down_compiler());
    }
    mlen = 0;
    while (4*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_int32down",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_uint32(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"uint32")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  uint32_t *m = (uint32_t *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_uint32();++impl) {
    void (*crypto_sort)(uint32_t *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_uint32_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_uint32(impl);
      printf("sort_uint32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint32_implementation(impl),djbsort_dispatch_uint32_compiler(impl));
    } else {
      crypto_sort = djbsort_uint32;
      printf("sort_uint32 selected implementation %s compiler %s\n",djbsort_uint32_implementation(),djbsort_uint32_compiler());
    }
    mlen = 0;
    while (4*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_uint32",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_uint32down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"uint32down")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  uint32_t *m = (uint32_t *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_uint32down();++impl) {
    void (*crypto_sort)(uint32_t *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_uint32down_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_uint32down(impl);
      printf("sort_uint32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint32down_implementation(impl),djbsort_dispatch_uint32down_compiler(impl));
    } else {
      crypto_sort = djbsort_uint32down;
      printf("sort_uint32down selected implementation %s compiler %s\n",djbsort_uint32down_implementation(),djbsort_uint32down_compiler());
    }
    mlen = 0;
    while (4*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_uint32down",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_float32(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"float32")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  float *m = (float *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_float32();++impl) {
    void (*crypto_sort)(float *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_float32_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_float32(impl);
      printf("sort_float32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float32_implementation(impl),djbsort_dispatch_float32_compiler(impl));
    } else {
      crypto_sort = djbsort_float32;
      printf("sort_float32 selected implementation %s compiler %s\n",djbsort_float32_implementation(),djbsort_float32_compiler());
    }
    mlen = 0;
    while (4*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_float32",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_float32down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"float32down")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  float *m = (float *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_float32down();++impl) {
    void (*crypto_sort)(float *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_float32down_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_float32down(impl);
      printf("sort_float32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float32down_implementation(impl),djbsort_dispatch_float32down_compiler(impl));
    } else {
      crypto_sort = djbsort_float32down;
      printf("sort_float32down selected implementation %s compiler %s\n",djbsort_float32down_implementation(),djbsort_float32down_compiler());
    }
    mlen = 0;
    while (4*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_float32down",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_int64(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"int64")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  int64_t *m = (int64_t *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_int64();++impl) {
    void (*crypto_sort)(int64_t *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_int64_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_int64(impl);
      printf("sort_int64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int64_implementation(impl),djbsort_dispatch_int64_compiler(impl));
    } else {
      crypto_sort = djbsort_int64;
      printf("sort_int64 selected implementation %s compiler %s\n",djbsort_int64_implementation(),djbsort_int64_compiler());
    }
    mlen = 0;
    while (8*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_int64",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_int64down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"int64down")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  int64_t *m = (int64_t *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_int64down();++impl) {
    void (*crypto_sort)(int64_t *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_int64down_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_int64down(impl);
      printf("sort_int64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int64down_implementation(impl),djbsort_dispatch_int64down_compiler(impl));
    } else {
      crypto_sort = djbsort_int64down;
      printf("sort_int64down selected implementation %s compiler %s\n",djbsort_int64down_implementation(),djbsort_int64down_compiler());
    }
    mlen = 0;
    while (8*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_int64down",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_uint64(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"uint64")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  uint64_t *m = (uint64_t *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_uint64();++impl) {
    void (*crypto_sort)(uint64_t *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_uint64_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_uint64(impl);
      printf("sort_uint64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint64_implementation(impl),djbsort_dispatch_uint64_compiler(impl));
    } else {
      crypto_sort = djbsort_uint64;
      printf("sort_uint64 selected implementation %s compiler %s\n",djbsort_uint64_implementation(),djbsort_uint64_compiler());
    }
    mlen = 0;
    while (8*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_uint64",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_uint64down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"uint64down")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  uint64_t *m = (uint64_t *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_uint64down();++impl) {
    void (*crypto_sort)(uint64_t *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_uint64down_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_uint64down(impl);
      printf("sort_uint64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint64down_implementation(impl),djbsort_dispatch_uint64down_compiler(impl));
    } else {
      crypto_sort = djbsort_uint64down;
      printf("sort_uint64down selected implementation %s compiler %s\n",djbsort_uint64down_implementation(),djbsort_uint64down_compiler());
    }
    mlen = 0;
    while (8*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_uint64down",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_float64(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"float64")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  double *m = (double *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_float64();++impl) {
    void (*crypto_sort)(double *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_float64_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_float64(impl);
      printf("sort_float64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float64_implementation(impl),djbsort_dispatch_float64_compiler(impl));
    } else {
      crypto_sort = djbsort_float64;
      printf("sort_float64 selected implementation %s compiler %s\n",djbsort_float64_implementation(),djbsort_float64_compiler());
    }
    mlen = 0;
    while (8*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_float64",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

static void measure_sort_float64down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"float64down")) return;
  void *mstorage = callocplus(MAXTEST_BYTES);
  double *m = (double *) aligned(mstorage);
  long long mlen;

  for (long long impl = -1;impl < djbsort_numimpl_float64down();++impl) {
    void (*crypto_sort)(double *,long long);
    if (targeti && strcmp(targeti,djbsort_dispatch_float64down_implementation(impl))) continue;
    if (impl >= 0) {
      crypto_sort = djbsort_dispatch_float64down(impl);
      printf("sort_float64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float64down_implementation(impl),djbsort_dispatch_float64down_compiler(impl));
    } else {
      crypto_sort = djbsort_float64down;
      printf("sort_float64down selected implementation %s compiler %s\n",djbsort_float64down_implementation(),djbsort_float64down_compiler());
    }
    mlen = 0;
    while (8*mlen <= MAXTEST_BYTES) {
      for (long long i = 0;i <= TIMINGS;++i) {
        t[i] = cpucycles();
        crypto_sort(m,mlen);
      }
      t_print("sort_float64down",impl,mlen);
      mlen += 1+mlen/2;
    }
  }
  free(mstorage);
}

#include "print_cpuid.inc"

int main(int argc,char **argv)
{
  printf("djbsort version %s\n",djbsort_version());
  printf("djbsort arch %s\n",djbsort_arch());
  print_cpuid();

  if (*argv) ++argv;
  if (*argv) {
    targeto = *argv++;
    if (*argv) {
      targetp = *argv++;
      if (*argv) {
        targeti = *argv++;
      }
    }
  }

  measure_cpucycles();
  limits();
  measure_sort_int32();
  measure_sort_int32down();
  measure_sort_uint32();
  measure_sort_uint32down();
  measure_sort_float32();
  measure_sort_float32down();
  measure_sort_int64();
  measure_sort_int64down();
  measure_sort_uint64();
  measure_sort_uint64down();
  measure_sort_float64();
  measure_sort_float64down();

  return 0;
}
