Class_Schedule_Generator_AI / semantic_ranker.py
cgreszes's picture
Create semantic_ranker.py
2a6e646 verified
# semantic_ranker.py
from typing import Optional
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util
_model = None
def _lazy_model():
global _model
if _model is None:
_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # CPU-fast
return _model
def score_courses(df: pd.DataFrame, query: str, text_cols=("name","subject")) -> pd.DataFrame:
"""Add 'sem_score' column based on cosine similarity to query; higher is better."""
if not query or not query.strip():
df["sem_score"] = 0.0
return df
model = _lazy_model()
corpus = (df[list(text_cols)]
.fillna("")
.agg(" - ".join, axis=1)
.tolist())
q_emb = model.encode([query], convert_to_tensor=True, normalize_embeddings=True)
c_emb = model.encode(corpus, convert_to_tensor=True, normalize_embeddings=True)
sims = util.cos_sim(q_emb, c_emb).cpu().numpy().ravel()
df = df.copy()
df["sem_score"] = sims
# sort with semantic score first, then keep original order for stability
df = df.sort_values(by="sem_score", ascending=False)
return df