USC-GPT / reranker.py
bhulston's picture
Uploading relevant documents
c755297
raw
history blame
602 Bytes
from sentence_transformers import CrossEncoder
import numpy as np
# Let's use a reranker to get better results from our semantic search
def reranker(query, matches):
docs = matches.matches
print("matches are:", docs)
pairs = []
for match in docs:
pairs.append((query, match["metadata"]["text"]))
model = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-2-v2', max_length = 512)
scores = model.predict(pairs)
top_indices = np.argsort(scores)[::-5]
top_results = ["Class: " + docs[index]["metadata"]["text"] for index in top_indices]
return top_results