|
|
| import json
|
| import numpy as np
|
| from sentence_transformers import SentenceTransformer
|
| import faiss
|
| from pathlib import Path
|
|
|
| DATA_DIR = Path("data")
|
| EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| TOP_K = 5
|
|
|
| def load_index():
|
| index = faiss.read_index(str(DATA_DIR / "vector_store.index"))
|
| return index
|
|
|
| def load_metadata():
|
| with open(DATA_DIR / "metadata.json", "r", encoding="utf-8") as f:
|
| return json.load(f)
|
|
|
| def embed_query(model, query):
|
| emb = model.encode([query], convert_to_numpy=True)
|
|
|
| faiss.normalize_L2(emb)
|
| return emb
|
|
|
| def search(query, top_k=TOP_K):
|
| model = SentenceTransformer(EMBED_MODEL)
|
| index = load_index()
|
| metadata = load_metadata()
|
|
|
| q_emb = embed_query(model, query)
|
| D, I = index.search(q_emb.astype('float32'), top_k)
|
|
|
| results = []
|
| for score, idx in zip(D[0], I[0]):
|
| meta = metadata[idx]
|
| results.append({"score": float(score), "doc": meta})
|
| return results
|
|
|
| if __name__ == "__main__":
|
| q = input("Enter your question/query: ").strip()
|
| res = search(q, top_k=5)
|
| for i, r in enumerate(res, 1):
|
| print(f"\n=== Result {i} (score={r['score']:.4f}) ===")
|
| print("Source:", r["doc"]["source_file"], "page:", r["doc"]["page"])
|
| print("Preview:", r["doc"]["text"][:800])
|
|
|