from typing import List, Dict, Union class KeywordSearchProvider: def __init__(self, documents: List[str]): from sklearn.feature_extraction.text import TfidfVectorizer self.vectorizer = TfidfVectorizer() self.tfidf_matrix = self.vectorizer.fit_transform(documents) self.documents = documents def search(self, query: str, top_k: int = 5) -> List[Dict[str, Union[str, float]]]: from sklearn.metrics.pairwise import cosine_similarity query_vector = self.vectorizer.transform([query]) similarities = cosine_similarity(query_vector, self.tfidf_matrix)[0] # Get top-k results top_indices = similarities.argsort()[-top_k:][::-1] results = [ { "document": self.documents[idx], "score": similarities[idx] } for idx in top_indices ] return results