// Key-Value Query-Response associative memory proof of concept.
// Copyright (C) 2025 Gubanov Sergei. All rights reserved.
#define _CRT_NONSTDC_NO_WARNINGS 1
#define _CRT_SECURE_NO_WARNINGS 1
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <inttypes.h>
#include <math.h>
#include <string.h>
#include <malloc.h>
#include <stdlib.h>
#ifdef __GNUC__
#define ALIGN(N) __attribute__((aligned(N)))
#else
#define ALIGN(N) __declspec(align(N))
#endif
#define CACHE_LINE_SIZE 64ull
#define THP_SIZE (2ull * 1024ull * 1024ull)
#define PI 3.14159265358979323846264338328
#define SPACE_DIM 32
typedef struct vector {
float f[SPACE_DIM];
} vector_t;
typedef struct associative_memory {
vector_t *K;
vector_t *V;
size_t N;
void *raw_pointer;
} associative_memory_t;
typedef struct associative_processor {
ALIGN(CACHE_LINE_SIZE) vector_t D;
float *KQ;
float *VR;
float *W;
associative_memory_t *memory;
void *raw_pointer;
} associative_processor_t;
static float vector_similarity(vector_t *X, vector_t *Y) {
float *x = X->f;
float *y = Y->f;
float XX = 1.0e-10f;
float YY = 1.0e-10f;
float XY = 0;
for (size_t i = 0; i < SPACE_DIM; i++) {
XX += (x[i] * x[i]);
YY += (y[i] * y[i]);
XY += (x[i] * y[i]);
}
return (XY / sqrtf(XX * YY));
}
static float get_vector_length(vector_t *X) {
float *x = X->f;
float sum2 = 0;
for (size_t i = 0; i < SPACE_DIM; i++) {
sum2 += (x[i] * x[i]);
}
return sqrtf(sum2);
}
static float set_vector_length(vector_t *X, const float new_length) {
float *x = X->f;
float sum2 = 0;
for (size_t i = 0; i < SPACE_DIM; i++) {
sum2 += (x[i] * x[i]);
}
float old_length = sqrtf(sum2);
if ((old_length != new_length) && (old_length > 0)) {
const float factor = (new_length / old_length);
for (size_t i = 0; i < SPACE_DIM; i++) {
x[i] *= factor;
}
}
}
static float scalar_product(vector_t *X, vector_t *Y) {
float *x = X->f;
float *y = Y->f;
float sum = 0;
for (size_t i = 0; i < SPACE_DIM; i++) {
sum += (x[i] * y[i]);
}
return sum;
}
static void vector_mul_add(vector_t *X, vector_t *Y, const float weight) {
float *x = X->f;
float *y = Y->f;
for (size_t i = 0; i < SPACE_DIM; i++) {
x[i] += (y[i] * weight);
}
}
static void vector_add(vector_t *X, vector_t *Y) {
float *x = X->f;
float *y = Y->f;
for (size_t i = 0; i < SPACE_DIM; i++) {
x[i] += y[i];
}
}
static void vector_sub(vector_t *X, vector_t *Y) {
float *x = X->f;
float *y = Y->f;
for (size_t i = 0; i < SPACE_DIM; i++) {
x[i] -= y[i];
}
}
static inline uint32_t random_uint32(uint64_t *seed) {
const uint64_t A = 3076458245721376007ull;
const uint64_t B = 6418069888083187241ull;
const uint64_t x = (*seed) * A + B;
const uint32_t result = ((uint32_t)(x >> 32));
(*seed) = x;
return result;
}
static inline double random_double(uint64_t *seed) {
return ((1.0 / 4294967296.0) * random_uint32(seed));
}
static void random_vector(vector_t *v, uint64_t *seed, float length) {
for (int i = 0; i < SPACE_DIM; i++) {
double p = 2 * PI * random_double(seed);
double r = random_double(seed);
v->f[i] = (float)(sin(p) * sqrt(-2 * log(r)));
}
set_vector_length(v, length);
}
static associative_memory_t *associative_memory_create(const size_t N) {
const size_t size_K = ((sizeof(vector_t) * N) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
const size_t size_V = ((sizeof(vector_t) * N) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
const size_t size_am = (sizeof(associative_memory_t) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
const size_t total_size = (THP_SIZE + size_am + size_K + size_V + THP_SIZE - 1) & ~(THP_SIZE - 1);
uintptr_t address = (uintptr_t)malloc(total_size);
if (address == 0) {
printf("associative_memory_create() Out of memory");
exit(1);
return NULL;
}
void *raw_pointer = (void *)address; address = (address + THP_SIZE - 1) & ~(THP_SIZE - 1);
associative_memory_t *memory = (associative_memory_t *)address; address += size_am;
memory->K = (vector_t *)address; address += size_K;
memory->V = (vector_t *)address; address += size_V;
memory->N = N;
memory->raw_pointer = raw_pointer;
return memory;
}
static void associative_memory_delete(associative_memory_t *memory) {
free(memory->raw_pointer);
}
static associative_processor_t *associative_processor_create(associative_memory_t *memory) {
const size_t N = memory->N;
const size_t size_KQ = ((sizeof(float) * N) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
const size_t size_VR = ((sizeof(float) * N) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
const size_t size_W = ((sizeof(float) * N) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
const size_t size_am = (sizeof(associative_processor_t) + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
const size_t total_size = (CACHE_LINE_SIZE + size_am + size_KQ + size_VR + size_W + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
uintptr_t address = (uintptr_t)malloc(total_size);
if (address == 0) {
printf("associative_processor_create() Out of memory");
exit(1);
return NULL;
}
void *raw_pointer = (void *)address;
address = (address + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
associative_processor_t *processor = (associative_processor_t *)address; address += size_am;
processor->KQ = (float *)address; address += size_KQ;
processor->VR = (float *)address; address += size_VR;
processor->W = (float *)address; address += size_W;
processor->memory = memory;
processor->raw_pointer = raw_pointer;
return processor;
}
static void associative_processor_delete(associative_processor_t *processor) {
free(processor->raw_pointer);
}
static void inference(associative_processor_t *processor, vector_t *Q, vector_t *R) {
vector_t *K = processor->memory->K;
vector_t *V = processor->memory->V;
size_t N = processor->memory->N;
float *KQ = processor->KQ;
float *VR = processor->VR;
float *W = processor->W;
vector_t *D = &processor->D;
float query_length = get_vector_length(Q);
for (size_t i = 0; i < N; i++) {
KQ[i] = scalar_product(&K[i], Q);
}
memset(R, 0, sizeof(vector_t));
for (size_t iteration = 0; iteration < 256; iteration++) {
float max_KQ_VR = 0;
for (size_t i = 0; i < N; i++) {
float VRi = scalar_product(&V[i], R);
VR[i] = VRi;
float KQ_VR = KQ[i] + VRi;
if (max_KQ_VR < KQ_VR) {
max_KQ_VR = KQ_VR;
}
}
float sum_w = 0;
for (size_t i = 0; i < N; i++) {
float w = expf(KQ[i] + VR[i] - max_KQ_VR);
sum_w += w;
W[i] = w;
}
if (!(sum_w > 0)) {
printf("sum_w == NaN\n");
exit(1);
}
memset(D, 0, sizeof(vector_t));
float norm_w = (1.0f / sum_w);
for (size_t i = 0; i < N; i++) {
float w = W[i] * norm_w;
if (w > 1.0e-20f) {
vector_mul_add(D, &V[i], w);
}
}
vector_sub(D, R);
const float dt = 1;
vector_mul_add(R, D, dt);
float D_length = get_vector_length(D);
float R_length = get_vector_length(R);
if (0) {
printf("[%" PRIu64 "] |Q|=%f, |R|=%f, |D|=%f, |D|/|R|=%f, max(exp(KQ+VR))=%e\n",
iteration, query_length, R_length, D_length, (D_length / R_length),
exp(max_KQ_VR));
}
if ((D_length * 10000) < R_length) {
if (0) {
printf("[%" PRIu64 "] |Q|=%f, |R|=%f, |D|=%f, |D|/|R|=%f, max(exp(KQ+VR))=%e\n",
iteration, query_length, R_length, D_length, (D_length / R_length),
exp(max_KQ_VR));
}
break;
}
}
}
int main(int argc, char *argv[]) {
const size_t GROUND_TRUE_SAMPLES_COUNT = 1024;
const size_t RANDOM_KEYS_SAMPLES_COUNT = (128 * 1024);
const float ground_true_vector_length = (float)sqrtf(logf(8.0f * GROUND_TRUE_SAMPLES_COUNT));
const float random_keys_vector_length = (float)sqrtf(logf(8.0f * RANDOM_KEYS_SAMPLES_COUNT));
printf("#\n");
printf("# Key-Value Query-Response associative memory proof of concept.\n");
printf("# Copyright (C) 2025 Gubanov Sergei. All rights reserved.\n");
printf("#\n");
printf("# Dimension of vector space = %d\n", SPACE_DIM);
printf("# Ground-True associative memory samples = %" PRIu64 "\n", GROUND_TRUE_SAMPLES_COUNT);
printf("# Random-Keys associative memory samples = %" PRIu64 "\n", RANDOM_KEYS_SAMPLES_COUNT);
associative_memory_t *ground_true_memory = associative_memory_create(GROUND_TRUE_SAMPLES_COUNT);
associative_processor_t *ground_true_processor = associative_processor_create(ground_true_memory);
uint64_t seed = 42;
for (size_t i = 0; i < ground_true_memory->N; i++) {
random_vector(&ground_true_memory->K[i], &seed, ground_true_vector_length);
random_vector(&ground_true_memory->V[i], &seed, ground_true_vector_length);
}
printf("#\n");
printf("# Ground-True associative memory self-verification:\n");
{
float min = 1;
float max = -1;
float sum = 0;
for (size_t i = 0; (i < ground_true_memory->N); i++) {
vector_t response;
inference(ground_true_processor, &ground_true_memory->K[i], &response);
const float s = vector_similarity(&ground_true_memory->V[i], &response);
sum += s;
if (min > s) { min = s; }
if (max < s) { max = s; }
const size_t P = (ground_true_memory->N / 16);
if ((i % P) == (P - 1)) {
printf("[%8" PRIu64 "] Similarity Min=%f Avr=%f Max=%f\n", (i + 1),
min, (sum / (i + 1)), max);
}
}
printf("# Similarity: Min=%f Avr=%f Max=%f\n", min, (sum / ground_true_memory->N), max);
}
printf("#\n");
printf("# Random-Keys associative memory sampling:\n");
associative_memory_t *random_keys_memory = associative_memory_create(RANDOM_KEYS_SAMPLES_COUNT);
for (size_t i = 0; i < random_keys_memory->N; i++) {
vector_t *key = &random_keys_memory->K[i];
vector_t *value = &random_keys_memory->V[i];
random_vector(key, &seed, random_keys_vector_length);
inference(ground_true_processor, key, value);
const size_t progress = (random_keys_memory->N / 16);
if ((i % progress) == (progress - 1)) {
printf("[%8" PRIu64 "] progress %6.2f%%\n", (i + 1), ((100.0 * (i + 1)) / random_keys_memory->N));
}
}
printf("#\n");
printf("# Random-Keys associative memory verification with respect to Ground-True memories:\n");
associative_processor_t *random_keys_processor = associative_processor_create(random_keys_memory);
{
float min = 1;
float max = -1;
float sum = 0;
for (size_t i = 0; (i < ground_true_memory->N); i++) {
vector_t response;
inference(random_keys_processor, &ground_true_memory->K[i], &response);
const float s = vector_similarity(&ground_true_memory->V[i], &response);
sum += s;
if (min > s) { min = s; }
if (max < s) { max = s; }
const size_t P = (ground_true_memory->N / 16);
if ((i % P) == (P - 1)) {
printf("[%8" PRIu64 "] Similarity Min=%f Avr=%f Max=%f\n", (i + 1),
min, (sum / (i + 1)), max);
}
}
printf("# Similarity: Min=%f Avr=%f Max=%f\n", min, (sum / ground_true_memory->N), max);
}
associative_processor_delete(ground_true_processor);
associative_processor_delete(random_keys_processor);
associative_memory_delete(ground_true_memory);
associative_memory_delete(random_keys_memory);
printf("#\n");
return 0;
}