Spaces:

omdena-lc
/

omdena-ng-lagos-chatbot-actions-server

Runtime error

App Files Files Community

pvanand commited on Aug 25, 2023

Commit

f5d9bbe

1 Parent(s): a6f4db6

perform vector search using FASS

Browse files

Files changed (1) hide show

actions/search_content.py +61 -0

actions/search_content.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# search_content.py
+import faiss
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+# Define paths for model, Faiss index, and data file
+MODEL_SAVE_PATH = "all-distilroberta-v1-model.pkl"
+FAISS_INDEX_FILE_PATH = "index.faiss"
+DATA_FILE_PATH = "omdena_qna_dataset/omdena_faq_training_data.csv"
+def load_transformer_model(model_file):
+    """Load a sentence transformer model from a file."""
+    return SentenceTransformer.load(model_file)
+def load_faiss_index(filename):
+    """Load a Faiss index from a file."""
+    return faiss.read_index(filename)
+def load_data(file_path):
+    """Load data from a CSV file and preprocess it."""
+    data_frame = pd.read_csv(file_path)
+    data_frame["id"] = data_frame.index
+    # Create a 'QNA' column that combines 'Questions' and 'Answers'
+    data_frame['QNA'] = data_frame.apply(lambda row: f"Question: {row['Questions']}, Answer: {row['Answers']}", axis=1)
+    return data_frame.set_index(["id"], drop=False)
+def search_content(query, data_frame_indexed, transformer_model, faiss_index, k=5):
+    """Search the content using a query and return the top k results."""
+    # Encode the query using the model
+    query_vector = transformer_model.encode([query])
+    # Normalize the query vector
+    faiss.normalize_L2(query_vector)
+    # Search the Faiss index using the query vector
+    top_k = faiss_index.search(query_vector, k)
+    # Extract the IDs and similarities of the top k results
+    ids = top_k[1][0].tolist()
+    similarities = top_k[0][0].tolist()
+    # Get the corresponding results from the data frame
+    results = data_frame_indexed.loc[ids]
+    # Add a column for the similarities
+    results["similarities"] = similarities
+    return results
+def main_search(query):
+    """Main function to execute the search."""
+    transformer_model = load_transformer_model(MODEL_SAVE_PATH)
+    faiss_index = load_faiss_index(FAISS_INDEX_FILE_PATH)
+    data_frame_indexed = load_data(DATA_FILE_PATH)
+    results = search_content(query, data_frame_indexed, transformer_model, faiss_index)
+    return results['QNA']  # return the results
+if __name__ == "__main__":
+    query = "school courses"
+    print(main_search(query))  # print the results if this script is run directly