Spaces:
Runtime error
Runtime error
from sklearn.feature_extraction.text import TfidfVectorizer | |
def find_matches(chunks, keywords, padding=500): | |
# Preprocess chunks | |
preprocessed_chunks = [' '.join(chunk.split()) for chunk in chunks] | |
# TF-IDF Vectorization | |
vectorizer = TfidfVectorizer(lowercase=True, stop_words='english') | |
tfidf_matrix = vectorizer.fit_transform(preprocessed_chunks) | |
# Compute cosine similarity between keywords and chunks | |
keyword_vector = vectorizer.transform([' '.join(keywords)]) | |
cosine_similarities = tfidf_matrix.dot(keyword_vector.T).toarray().flatten() | |
# Rank chunks based on cosine similarity | |
results = {i: score for i, score in enumerate(cosine_similarities)} | |
return dict(sorted(results.items(), key=lambda item: item[1], reverse=True)) | |