annikwag's picture
Upload 1639 files
883557f verified
raw
history blame
No virus
842 Bytes
import faiss
"""
Semantic Search Function
"""
def search(query, model, embeddings, filtered_df, top_x=20):
filtered_df_indecies_list = filtered_df.index
filtered_embeddings = embeddings[filtered_df_indecies_list]
# Load or create FAISS index
dimension = filtered_embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(filtered_embeddings)
# Convert query to embedding
query_embedding = model.encode([query])[0].reshape(1, -1)
# Perform search
D, I = faiss_index.search(query_embedding, k=top_x) # Search for top x similar items
# Extract the sentences corresponding to the top indices
top_indecies = [i for i in I[0]]
return filtered_df.iloc[top_indecies]