Spaces:
Sleeping
Sleeping
File size: 1,288 Bytes
ccab347 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
def prediction(file_paths,uploaded_text):
sentences = []
for file_path in file_paths:
with open(file_path, 'r') as file:
text = file.read()
sentences.append(text)
sentences.append(uploaded_text)
# Load the pre-trained BERT-based model
model_name = 'sentence-transformers/bert-base-nli-mean-tokens'
model = SentenceTransformer(model_name)
# Use the model to encode the sentences
sentence_embeddings = model.encode(sentences)
# Calculate cosine similarity between the uploaded text and the rest of the sentence embeddings
query_embedding = sentence_embeddings[-1] # Uploaded text embedding
candidate_embeddings = sentence_embeddings[:-1] # Embeddings of other files
cosine_similarities = cosine_similarity([query_embedding], candidate_embeddings)[0]
# Combine the candidate sentences with their corresponding cosine similarities
predictions = list(zip(file_paths, cosine_similarities))
# Sort the predictions based on cosine similarity (highest to lowest)
predictions.sort(key=lambda x: x[1], reverse=True)
return predictions
|