Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -140,6 +140,23 @@ from langchain_core.pydantic_v1 import BaseModel, Field
|
|
140 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
141 |
from langgraph.graph import END, StateGraph, START
|
142 |
import chromadb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
def process_documents(temp_dir):
|
145 |
"""Process documents from the extracted zip folder."""
|
@@ -200,7 +217,7 @@ def setup_rag_system(temp_dir):
|
|
200 |
|
201 |
# Setup vector store
|
202 |
ids = [str(i) for i in df['chunk_id'].to_list()]
|
203 |
-
client = chromadb.PersistentClient(path=tempfile.mkdtemp())
|
204 |
vector_store = Chroma(
|
205 |
client=client,
|
206 |
collection_name="rag-chroma",
|
@@ -208,7 +225,7 @@ def setup_rag_system(temp_dir):
|
|
208 |
)
|
209 |
|
210 |
# Add documents in batches
|
211 |
-
batch_size = 100
|
212 |
for i in range(0, len(list_of_documents), batch_size):
|
213 |
end_idx = min(i + batch_size, len(list_of_documents))
|
214 |
vector_store.add_documents(
|
@@ -227,7 +244,9 @@ def create_workflow(vector_store):
|
|
227 |
"""You are an assistant for responding to Request For Proposal documents for a
|
228 |
bidder in the field of Data Science and Engineering. Use the following pieces
|
229 |
of retrieved context to respond to the requests. If you don't know the answer,
|
230 |
-
just say that you don't know.
|
|
|
|
|
231 |
Question: {question}
|
232 |
Context: {context}
|
233 |
Answer:"""
|
@@ -245,6 +264,31 @@ def create_workflow(vector_store):
|
|
245 |
|
246 |
return rag_chain
|
247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
def handle_upload(zip_file, csv_file):
|
249 |
"""Handle file uploads and process requirements."""
|
250 |
try:
|
@@ -256,10 +300,8 @@ def handle_upload(zip_file, csv_file):
|
|
256 |
with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
|
257 |
zip_ref.extractall(temp_dir)
|
258 |
|
259 |
-
#
|
260 |
-
requirements_df =
|
261 |
-
if 'requirement' not in requirements_df.columns:
|
262 |
-
raise ValueError("CSV file must contain a 'requirement' column")
|
263 |
|
264 |
# Setup RAG system
|
265 |
vector_store = setup_rag_system(temp_dir)
|
@@ -289,19 +331,23 @@ def handle_upload(zip_file, csv_file):
|
|
289 |
except Exception as e:
|
290 |
return pd.DataFrame([{'error': str(e)}])
|
291 |
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
|
|
|
|
|
|
|
|
305 |
|
306 |
if __name__ == "__main__":
|
307 |
-
|
|
|
140 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
141 |
from langgraph.graph import END, StateGraph, START
|
142 |
import chromadb
|
143 |
+
import io
|
144 |
+
|
145 |
+
# Environment variables setup
|
146 |
+
os.environ["TAVILY_API_KEY"] = "YOUR_TAVILY_API_KEY"
|
147 |
+
os.environ["NVIDIA_API_KEY"] = "YOUR_NVIDIA_API_KEY"
|
148 |
+
os.environ["LANGCHAIN_PROJECT"] = "RAG project"
|
149 |
+
|
150 |
+
class GradeDocuments(BaseModel):
|
151 |
+
"""Binary score for relevance check on retrieved documents."""
|
152 |
+
binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")
|
153 |
+
|
154 |
+
class GraphState(TypedDict):
|
155 |
+
"""Represents the state of our graph."""
|
156 |
+
question: str
|
157 |
+
generation: str
|
158 |
+
decision: str
|
159 |
+
documents: List[str]
|
160 |
|
161 |
def process_documents(temp_dir):
|
162 |
"""Process documents from the extracted zip folder."""
|
|
|
217 |
|
218 |
# Setup vector store
|
219 |
ids = [str(i) for i in df['chunk_id'].to_list()]
|
220 |
+
client = chromadb.PersistentClient(path=tempfile.mkdtemp())
|
221 |
vector_store = Chroma(
|
222 |
client=client,
|
223 |
collection_name="rag-chroma",
|
|
|
225 |
)
|
226 |
|
227 |
# Add documents in batches
|
228 |
+
batch_size = 100
|
229 |
for i in range(0, len(list_of_documents), batch_size):
|
230 |
end_idx = min(i + batch_size, len(list_of_documents))
|
231 |
vector_store.add_documents(
|
|
|
244 |
"""You are an assistant for responding to Request For Proposal documents for a
|
245 |
bidder in the field of Data Science and Engineering. Use the following pieces
|
246 |
of retrieved context to respond to the requests. If you don't know the answer,
|
247 |
+
just say that you don't know. Provide detailed responses with specific examples
|
248 |
+
and capabilities where possible.
|
249 |
+
|
250 |
Question: {question}
|
251 |
Context: {context}
|
252 |
Answer:"""
|
|
|
264 |
|
265 |
return rag_chain
|
266 |
|
267 |
+
def preprocess_csv(csv_file):
|
268 |
+
"""Preprocess the CSV file to ensure proper format."""
|
269 |
+
try:
|
270 |
+
# First try reading as is
|
271 |
+
df = pd.read_csv(csv_file.name, encoding='latin-1')
|
272 |
+
|
273 |
+
# If there's only one column and no header
|
274 |
+
if len(df.columns) == 1 and df.columns[0] != 'requirement':
|
275 |
+
# Read again with no header and assign column name
|
276 |
+
df = pd.read_csv(csv_file.name, encoding='latin-1', header=None, names=['requirement'])
|
277 |
+
|
278 |
+
# If there's no 'requirement' column, assume first column is requirements
|
279 |
+
if 'requirement' not in df.columns:
|
280 |
+
df = df.rename(columns={df.columns[0]: 'requirement'})
|
281 |
+
|
282 |
+
return df
|
283 |
+
except Exception as e:
|
284 |
+
# If standard CSV reading fails, try reading as plain text
|
285 |
+
try:
|
286 |
+
with open(csv_file.name, 'r', encoding='latin-1') as f:
|
287 |
+
requirements = f.read().strip().split('\n')
|
288 |
+
return pd.DataFrame({'requirement': requirements})
|
289 |
+
except Exception as e2:
|
290 |
+
raise ValueError(f"Could not process CSV file: {str(e2)}")
|
291 |
+
|
292 |
def handle_upload(zip_file, csv_file):
|
293 |
"""Handle file uploads and process requirements."""
|
294 |
try:
|
|
|
300 |
with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
|
301 |
zip_ref.extractall(temp_dir)
|
302 |
|
303 |
+
# Preprocess and read requirements CSV
|
304 |
+
requirements_df = preprocess_csv(csv_file)
|
|
|
|
|
305 |
|
306 |
# Setup RAG system
|
307 |
vector_store = setup_rag_system(temp_dir)
|
|
|
331 |
except Exception as e:
|
332 |
return pd.DataFrame([{'error': str(e)}])
|
333 |
|
334 |
+
def main():
|
335 |
+
"""Main function to run the Gradio interface."""
|
336 |
+
iface = gr.Interface(
|
337 |
+
fn=handle_upload,
|
338 |
+
inputs=[
|
339 |
+
gr.File(label="Upload ZIP folder containing URLs", file_types=[".zip"]),
|
340 |
+
gr.File(label="Upload Requirements CSV", file_types=[".csv", ".txt"])
|
341 |
+
],
|
342 |
+
outputs=gr.Dataframe(),
|
343 |
+
title="RAG System for RFP Analysis",
|
344 |
+
description="""Upload a ZIP folder containing URL documents and a CSV file with requirements to analyze.
|
345 |
+
The CSV file should contain requirements either as a single column or with a 'requirement' column header.""",
|
346 |
+
examples=[],
|
347 |
+
cache_examples=False
|
348 |
+
)
|
349 |
+
|
350 |
+
iface.launch(share=True)
|
351 |
|
352 |
if __name__ == "__main__":
|
353 |
+
main()
|