Spaces:
Runtime error
Runtime error
Update agent.csv
Browse files
agent.py
CHANGED
@@ -21,14 +21,15 @@ from langchain.embeddings.base import Embeddings
|
|
21 |
from typing import List
|
22 |
import numpy as np
|
23 |
|
24 |
-
import
|
25 |
import uuid
|
26 |
from langchain.embeddings import HuggingFaceEmbeddings
|
27 |
from langchain.vectorstores import FAISS
|
28 |
from langchain.schema import Document
|
29 |
-
from langchain.
|
30 |
from sentence_transformers import SentenceTransformer
|
31 |
|
|
|
32 |
load_dotenv()
|
33 |
|
34 |
@tool
|
@@ -136,39 +137,53 @@ sys_msg = SystemMessage(content=system_prompt)
|
|
136 |
|
137 |
|
138 |
# -------------------------------
|
139 |
-
# Step 1: Load
|
140 |
# -------------------------------
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
# -------------------------------
|
154 |
-
# Step 2: Set up Embeddings
|
155 |
# -------------------------------
|
|
|
156 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
|
|
|
|
157 |
vector_store = FAISS.from_documents(docs, embedding_model)
|
158 |
|
159 |
-
# Save FAISS index locally
|
160 |
vector_store.save_local("faiss_index")
|
161 |
|
|
|
|
|
162 |
# -------------------------------
|
163 |
-
# Step 3: Create Retriever Tool
|
164 |
# -------------------------------
|
165 |
retriever = vector_store.as_retriever()
|
166 |
|
167 |
-
# Create retriever tool
|
168 |
question_retriever_tool = create_retriever_tool(
|
169 |
retriever=retriever,
|
170 |
name="Question_Search",
|
171 |
-
description="
|
172 |
)
|
173 |
|
174 |
|
|
|
21 |
from typing import List
|
22 |
import numpy as np
|
23 |
|
24 |
+
import pandas as pd
|
25 |
import uuid
|
26 |
from langchain.embeddings import HuggingFaceEmbeddings
|
27 |
from langchain.vectorstores import FAISS
|
28 |
from langchain.schema import Document
|
29 |
+
from langchain.agents import create_retriever_tool
|
30 |
from sentence_transformers import SentenceTransformer
|
31 |
|
32 |
+
|
33 |
load_dotenv()
|
34 |
|
35 |
@tool
|
|
|
137 |
|
138 |
|
139 |
# -------------------------------
|
140 |
+
# Step 1: Load documents from CSV file (max 165 rows)
|
141 |
# -------------------------------
|
142 |
+
csv_file_path = "/home/wendy/Downloads/documents.csv" # Replace with your actual file path
|
143 |
+
df = pd.read_csv(csv_file_path).head(165)
|
144 |
+
|
145 |
+
# Check if 'content' column exists
|
146 |
+
assert 'content' in df.columns, "'content' column is required in the CSV file."
|
147 |
+
|
148 |
+
# Add 'id' and 'metadata' column
|
149 |
+
df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]
|
150 |
+
if 'metadata' not in df.columns:
|
151 |
+
df['metadata'] = [{} for _ in range(len(df))]
|
152 |
+
else:
|
153 |
+
# If metadata is a JSON string, convert it to dict
|
154 |
+
import json
|
155 |
+
df['metadata'] = df['metadata'].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
|
156 |
+
|
157 |
+
# Convert each row into a Document
|
158 |
+
docs = [
|
159 |
+
Document(page_content=row['content'], metadata={'id': row['id'], **row['metadata']})
|
160 |
+
for _, row in df.iterrows()
|
161 |
+
]
|
162 |
|
163 |
# -------------------------------
|
164 |
+
# Step 2: Set up HuggingFace Embeddings and FAISS VectorStore
|
165 |
# -------------------------------
|
166 |
+
# Initialize HuggingFace Embedding model
|
167 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
168 |
+
|
169 |
+
# Create FAISS VectorStore from documents
|
170 |
vector_store = FAISS.from_documents(docs, embedding_model)
|
171 |
|
172 |
+
# Save the FAISS index locally
|
173 |
vector_store.save_local("faiss_index")
|
174 |
|
175 |
+
#print("✅ FAISS index created and saved locally.")
|
176 |
+
|
177 |
# -------------------------------
|
178 |
+
# Step 3: Create Retriever Tool (for use in LangChain)
|
179 |
# -------------------------------
|
180 |
retriever = vector_store.as_retriever()
|
181 |
|
182 |
+
# Create the retriever tool
|
183 |
question_retriever_tool = create_retriever_tool(
|
184 |
retriever=retriever,
|
185 |
name="Question_Search",
|
186 |
+
description="A tool to retrieve documents related to a user's question."
|
187 |
)
|
188 |
|
189 |
|