# semantic_ranker.py from typing import Optional import numpy as np import pandas as pd from sentence_transformers import SentenceTransformer, util _model = None def _lazy_model(): global _model if _model is None: _model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # CPU-fast return _model def score_courses(df: pd.DataFrame, query: str, text_cols=("name","subject")) -> pd.DataFrame: """Add 'sem_score' column based on cosine similarity to query; higher is better.""" if not query or not query.strip(): df["sem_score"] = 0.0 return df model = _lazy_model() corpus = (df[list(text_cols)] .fillna("") .agg(" - ".join, axis=1) .tolist()) q_emb = model.encode([query], convert_to_tensor=True, normalize_embeddings=True) c_emb = model.encode(corpus, convert_to_tensor=True, normalize_embeddings=True) sims = util.cos_sim(q_emb, c_emb).cpu().numpy().ravel() df = df.copy() df["sem_score"] = sims # sort with semantic score first, then keep original order for stability df = df.sort_values(by="sem_score", ascending=False) return df