#include <immintrin.h>
#include <stdint.h>
#include <inttypes.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N0 100000000
#define N1 123456789
#define N2 234567891
#define N3 345678912
#define N4 456789123
#define N5 567891234
#define N6 678912345
#define N7 789123456
#define N8 891234567
#define N9 912345678
#define N10 999999999
static uint64_t random_seed;
static inline uint32_t next_random()
{
random_seed = random_seed * 7305322493271191329ull + 2938793273570587247ull;
return (random_seed >> 24);
}
static inline unsigned int f_empty(unsigned int n)
{
return n;
}
static inline unsigned int f_branch(unsigned int n)
{
if (n < N0) { return 0; }
if (n < N1) { return 1; }
if (n < N2) { return 2; }
if (n < N3) { return 3; }
if (n < N4) { return 4; }
if (n < N5) { return 5; }
if (n < N6) { return 6; }
if (n < N7) { return 7; }
if (n < N8) { return 8; }
if (n < N9) { return 9; }
if (n < N10) { return 10; }
return 11;
}
static inline unsigned int f_avx512(unsigned int n)
{
__m512i a = _mm512_set1_epi32(n);
__m512i b = _mm512_set_epi32(N0, N1, N2, N3, N4, N5, N6, N7, N8, N9, N10, -1, -1, -1, -1, -1);
return 16 - _popcnt32(_mm512_mask2int(_mm512_cmplt_epu32_mask(a, b))); // k = (a < b) ? 1 : 0
}
int main(int argc, char* argv[])
{
const uint64_t N = 1000000000;
uint64_t n, dummy, t0, t1, t2;
printf("begin functional test...\n");
for (n = 0; n < N; n++) {
unsigned int r1 = f_branch(n);
unsigned int r2 = f_avx512(n);
if (r1 != r2) {
printf("functional test failed!!!\n");
printf("f_branch(%" PRIu64 ")=%d f_avx512(%" PRIu64 ")=%d\n", n, r1, n, r2);
return -1;
}
}
printf("functional test passed\n");
printf("run performance test...\n");
dummy = 0;
random_seed = 42;
t0 = _rdtsc();
for (n = 0; n < N; n++) {
dummy += f_empty(next_random());
}
t0 = _rdtsc() - t0;
random_seed = 42;
t1 = _rdtsc();
for (n = 0; n < N; n++) {
dummy += f_branch(next_random());
}
t1 = _rdtsc() - t1;
random_seed = 42;
t2 = _rdtsc();
for (n = 0; n < N; n++) {
dummy += f_avx512(next_random());
}
t2 = _rdtsc() - t2;
printf("ticks (empty ) %f\n", (double)t0 / (double)N);
printf("ticks (branch) %f\n", (double)t1 / (double)N);
printf("ticks (avx512) %f\n", (double)t2 / (double)N);
printf("gain %f\n", (double)(t1 - t0) / (double)(t2 - t0));
printf("dummy %" PRIu64 "\n", dummy);
return 0;
}