Spaces:
Runtime error
Runtime error
File size: 775 Bytes
bac8f2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from sklearn.feature_extraction.text import TfidfVectorizer
def find_matches(chunks, keywords, padding=500):
# Preprocess chunks
preprocessed_chunks = [' '.join(chunk.split()) for chunk in chunks]
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(lowercase=True, stop_words='english')
tfidf_matrix = vectorizer.fit_transform(preprocessed_chunks)
# Compute cosine similarity between keywords and chunks
keyword_vector = vectorizer.transform([' '.join(keywords)])
cosine_similarities = tfidf_matrix.dot(keyword_vector.T).toarray().flatten()
# Rank chunks based on cosine similarity
results = {i: score for i, score in enumerate(cosine_similarities)}
return dict(sorted(results.items(), key=lambda item: item[1], reverse=True))
|