Spaces:

JJTsao
/

rag-movie-api

Running

File size: 2,588 Bytes
import time
from pathlib import Path

import joblib
import torch
from app.core.config import BM25_PATH, EMBEDDING_MODEL, INTENT_MODEL
from rank_bm25 import BM25Okapi
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# === Model Config ===
_sentence_model = None  # Not loaded at import time


def load_sentence_model():
    global _sentence_model
    if _sentence_model is None:
        print("Loading embedding model...")
        _sentence_model = SentenceTransformer(
            EMBEDDING_MODEL, device="cuda" if torch.cuda.is_available() else "cpu"
        )

        print(f"Model '{EMBEDDING_MODEL}' loaded. Performing GPU warmup...")

        # Realistic multi-sentence warmup to trigger full CUDA graph
        warmup_sentences = [
            "A suspenseful thriller with deep character development and moral ambiguity.",
            "Coming-of-age story with emotional storytelling and strong ensemble performances.",
            "Mind-bending sci-fi with philosophical undertones and high concept ideas.",
            "Recommend me some comedies.",
        ]
        _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
        time.sleep(0.5)
        _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
        print("🚀 Embedding model fully warmed up.")

    return _sentence_model


def setup_intent_classifier():
    print(f"Loading intent classifier from {INTENT_MODEL}")
    classifier = pipeline("text-classification", model=INTENT_MODEL)

    print("Warming up intent classifier...")
    warmup_queries = [
        "Can you recommend a feel-good movie?",
        "Who directed The Godfather?",
        "Do you like action films?",
    ]
    for q in warmup_queries:
        _ = classifier(q)

    print("🤖 Classifier ready")
    return classifier


def load_bm25_files() -> tuple[dict[str, BM25Okapi], dict[str, int]]:
    bm25_dir = Path(BM25_PATH)
    try:
        bm25_models = {
            "movie": joblib.load(bm25_dir / "movie_bm25_model.joblib"),
            "tv": joblib.load(bm25_dir / "tv_bm25_model.joblib"),
        }
        bm25_vocabs = {
            "movie": joblib.load(bm25_dir / "movie_bm25_vocab.joblib"),
            "tv": joblib.load(bm25_dir / "tv_bm25_vocab.joblib"),
        }
        print("✅ BM25 files loaded")
    except FileNotFoundError as e:
        raise FileNotFoundError(f"Missing BM25 files: {e}")
    return bm25_models, bm25_vocabs


def embed_text(text: str) -> list[float]:
    model = load_sentence_model()
    return model.encode(text).tolist()