wt002 commited on
Commit
6ead180
·
verified ·
1 Parent(s): b7ebe22

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +72 -5
agent.py CHANGED
@@ -37,7 +37,7 @@ import re
37
  from langchain_community.document_loaders import TextLoader, PyMuPDFLoader
38
  from docx import Document as DocxDocument
39
  import openpyxl
40
-
41
 
42
  load_dotenv()
43
 
@@ -312,16 +312,59 @@ for task in tasks:
312
  # Step 4: Set up HuggingFace Embeddings and FAISS VectorStore
313
  # -------------------------------
314
  # Initialize HuggingFace Embedding model
315
- embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 
316
 
317
 
318
- from langchain_community.document_loaders import WikipediaLoader
319
 
320
- wiki_docs = WikipediaLoader(query="Mercedes Sosa", load_max_docs=3).load()
321
- all_docs = docs + wiki_docs # `docs` is your existing list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  vector_store = FAISS.from_documents(all_docs, embedding_model)
323
  vector_store.save_local("faiss_index")
324
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
 
327
  # -------------------------------
@@ -336,6 +379,30 @@ question_retriever_tool = create_retriever_tool(
336
  description="A tool to retrieve documents related to a user's question."
337
  )
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
 
341
  tools = [
 
37
  from langchain_community.document_loaders import TextLoader, PyMuPDFLoader
38
  from docx import Document as DocxDocument
39
  import openpyxl
40
+ from io import StringIO
41
 
42
  load_dotenv()
43
 
 
312
  # Step 4: Set up HuggingFace Embeddings and FAISS VectorStore
313
  # -------------------------------
314
  # Initialize HuggingFace Embedding model
315
+ #embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
316
+ embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
317
 
318
 
 
319
 
320
+ # -----------------------------
321
+ # Step 1: Load CSV Questions
322
+ # -----------------------------
323
+ csv_path = "questions.csv" # Change to your CSV file
324
+ df = pd.read_csv(csv_path)
325
+
326
+ docs = []
327
+ for _, row in df.iterrows():
328
+ question = str(row.get("question", "")).strip()
329
+ if question:
330
+ docs.append(Document(page_content=question, metadata={"source": "csv"}))
331
+ docs = []
332
+ for _, row in df.iterrows():
333
+ question = str(row.get("question", "")).strip()
334
+ if question:
335
+ docs.append(Document(page_content=question, metadata={"source": "csv"}))
336
+
337
+ # -----------------------------
338
+ # Step 2: Add Wikipedia Docs
339
+ # -----------------------------
340
+ wiki_docs = []
341
+ for doc in docs:
342
+ try:
343
+ wiki_results = WikipediaLoader(query=doc.page_content, load_max_docs=1).load()
344
+ wiki_docs.extend(wiki_results)
345
+ except Exception as e:
346
+ print(f"Failed to load Wikipedia for: {doc.page_content} — {e}")
347
+
348
+ all_docs = docs + wiki_docs
349
+
350
+ # -----------------------------
351
+ # Step 3: Build FAISS Index
352
+ # -----------------------------
353
+ embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
354
  vector_store = FAISS.from_documents(all_docs, embedding_model)
355
  vector_store.save_local("faiss_index")
356
 
357
+ # -----------------------------
358
+ # Step 4: Create Retriever Tool
359
+ # -----------------------------
360
+ retriever = vector_store.as_retriever()
361
+
362
+ question_retriever_tool = create_retriever_tool(
363
+ retriever=retriever,
364
+ name="Question_Search",
365
+ description="A tool to retrieve documents related to a user's question."
366
+ )
367
+
368
 
369
 
370
  # -------------------------------
 
379
  description="A tool to retrieve documents related to a user's question."
380
  )
381
 
382
+ vector_store = FAISS.from_documents(all_docs, embedding_model)
383
+ vector_store.save_local("faiss_index")
384
+
385
+
386
+
387
+ def retriever(state: MessagesState):
388
+ """Retriever node using similarity scores for filtering"""
389
+ query = state["messages"][0].content
390
+ results = vector_store.similarity_search_with_score(query, k=4) # top 4 matches
391
+
392
+ # Filter by score (lower is more similar; adjust threshold as needed)
393
+ threshold = 0.8
394
+ filtered = [doc for doc, score in results if score < threshold]
395
+
396
+ if not filtered:
397
+ example_msg = HumanMessage(content="No relevant documents found.")
398
+ else:
399
+ content = "\n\n".join(doc.page_content for doc in filtered)
400
+ example_msg = HumanMessage(
401
+ content=f"Here are relevant reference documents:\n\n{content}"
402
+ )
403
+
404
+ return {"messages": [sys_msg] + state["messages"] + [example_msg]}
405
+
406
 
407
 
408
  tools = [