Jupiter-FAQ-streamlit / semantic_search.py
bhutesh65's picture
Upload 12 files
001593c verified
# semantic_search.py
import json
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# Load cleaned FAQs
with open("cleaned_faqs.json", "r", encoding="utf-8") as f:
data = json.load(f)
questions = [item["question"] for item in data]
model = SentenceTransformer('all-MiniLM-L6-v2') # lightweight and HuggingFace-friendly
# Generate embeddings
embeddings = model.encode(questions)
def search_faq(query, top_k=3):
query_embedding = model.encode([query])
scores = cosine_similarity(query_embedding, embeddings)[0]
top_indices = np.argsort(scores)[::-1][:top_k]
results = []
for idx in top_indices:
results.append(data[idx])
return results
#----------------------------------------------------------------------------
# from rephrase_with_mistral import rephrase_with_mistral
# from semantic_search import search_faq
# api_key = "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" # <-- Paste your actual API key here
# query = "how do I update my KYC?"
# top_faq = search_faq(query)[0]
# print("πŸ”Ž FAQ Retrieved:")
# print(top_faq['question'])
# print(top_faq['answer'])
# # Now rephrase
# print("\nπŸ’¬ Rephrased Answer:")
# #print(rephrase_with_mistral(top_faq['question'], top_faq['answer'], api_key))
# print(rephrase_with_mistral(query, [top_faq], api_key)) # use a list of one FAQ