pushpdeep's picture
Duplicate from pushpdeep/IR-project
70a4e1e
raw
history blame contribute delete
No virus
956 Bytes
import numpy as np
def vector_search(query, model, index, num_results=10):
"""Tranforms query to vector using a pretrained, sentence-level
DistilBERT model and finds similar vectors using FAISS.
Args:
query (str): User query that should be more than a sentence long.
model (sentence_transformers.SentenceTransformer.SentenceTransformer)
index (`numpy.ndarray`): FAISS index that needs to be deserialized.
num_results (int): Number of results to return.
Returns:
D (:obj:`numpy.array` of `float`): Distance between results and query.
I (:obj:`numpy.array` of `int`): Paper ID of the results.
"""
vector = model.encode(list(query))
D, I = index.search(np.array(vector).astype("float32"), k=num_results)
return D, I
def id2details(df, I, column):
"""Returns the paper titles based on the paper index."""
return [list(df[df.rid == idx][column]) for idx in I[0]]