personalisation-engine / scripts /evaluate_system.py
nice-bill's picture
Readme updated
2d773b1
import pandas as pd
import numpy as np
import faiss
from pathlib import Path
import logging
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Evaluator")
DATA_DIR = Path("data")
SYNTHETIC_DATA_PATH = DATA_DIR / "synthetic" / "user_sequences.parquet"
CATALOG_PATH = DATA_DIR / "catalog" / "books_catalog.csv"
EMBEDDINGS_PATH = DATA_DIR / "embeddings_cache.npy"
INDEX_PATH = DATA_DIR / "index" / "optimized.index"
def evaluate_hit_rate(top_k=10, sample_size=1000):
"""
Evaluates the recommender using a Leave-One-Out strategy.
metric: Hit Rate @ k
"""
logger.info("Loading Catalog and Embeddings...")
if not CATALOG_PATH.exists() or not EMBEDDINGS_PATH.exists():
logger.error("Missing Data! Run download scripts first.")
return
df_catalog = pd.read_csv(CATALOG_PATH)
titles = df_catalog['title'].tolist()
title_to_idx = {t.lower().strip(): i for i, t in enumerate(titles)}
embeddings = np.load(EMBEDDINGS_PATH)
logger.info("Loading FAISS Index...")
if INDEX_PATH.exists():
index = faiss.read_index(str(INDEX_PATH))
index.nprobe = 10
else:
logger.info("Optimized index not found, building flat index on the fly...")
d = embeddings.shape[1]
index = faiss.IndexFlatIP(d)
faiss.normalize_L2(embeddings)
index.add(embeddings)
logger.info(f"Loading Synthetic Data from {SYNTHETIC_DATA_PATH}...")
df_users = pd.read_parquet(SYNTHETIC_DATA_PATH)
if len(df_users) > sample_size:
df_users = df_users.sample(sample_size, random_state=42)
logger.info(f"Evaluating on {len(df_users)} users...")
hits = 0
processed_users = 0
for _, row in tqdm(df_users.iterrows(), total=len(df_users)):
history = row['book_sequence']
if len(history) < 2:
continue
target_book = history[-1]
context_books = history[:-1]
valid_indices = []
for book in context_books:
norm_title = book.lower().strip()
if norm_title in title_to_idx:
valid_indices.append(title_to_idx[norm_title])
if not valid_indices:
continue
context_vectors = embeddings[valid_indices]
n = len(valid_indices)
decay_factor = 0.9
weights = np.array([decay_factor ** (n - 1 - i) for i in range(n)])
weights = weights / weights.sum()
user_vector = np.average(context_vectors, axis=0, weights=weights).reshape(1, -1).astype(np.float32)
faiss.normalize_L2(user_vector)
search_k = top_k + len(valid_indices) + 5
scores, indices = index.search(user_vector, search_k)
recommended_titles = []
seen_indices = set(valid_indices)
for idx in indices[0]:
if idx in seen_indices:
continue
rec_title = titles[idx]
recommended_titles.append(rec_title)
if len(recommended_titles) >= top_k:
break
target_norm = target_book.lower().strip()
rec_norm = [t.lower().strip() for t in recommended_titles]
if target_norm in rec_norm:
hits += 1
processed_users += 1
if processed_users == 0:
print("No valid users found for evaluation.")
return
hit_rate = hits / processed_users
print("\n" + "="*40)
print(f"EVALUATION REPORT (Sample: {processed_users} users)")
print("="*40)
print(f"Metric: Hit Rate @ {top_k}")
print(f"Score: {hit_rate:.4f} ({hit_rate*100:.2f}%)")
print("-" * 40)
print("Interpretation:")
print(f"In {hit_rate*100:.1f}% of cases, the model successfully predicted")
print("the exact next book the user would read.")
print("="*40)
if __name__ == "__main__":
evaluate_hit_rate()