Codequestt commited on
Commit
ec80195
·
verified ·
1 Parent(s): 33cccc8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -21
app.py CHANGED
@@ -140,6 +140,23 @@ from langchain_core.pydantic_v1 import BaseModel, Field
140
  from langchain_community.tools.tavily_search import TavilySearchResults
141
  from langgraph.graph import END, StateGraph, START
142
  import chromadb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  def process_documents(temp_dir):
145
  """Process documents from the extracted zip folder."""
@@ -200,7 +217,7 @@ def setup_rag_system(temp_dir):
200
 
201
  # Setup vector store
202
  ids = [str(i) for i in df['chunk_id'].to_list()]
203
- client = chromadb.PersistentClient(path=tempfile.mkdtemp()) # Use temporary directory
204
  vector_store = Chroma(
205
  client=client,
206
  collection_name="rag-chroma",
@@ -208,7 +225,7 @@ def setup_rag_system(temp_dir):
208
  )
209
 
210
  # Add documents in batches
211
- batch_size = 100 # Smaller batch size for better memory management
212
  for i in range(0, len(list_of_documents), batch_size):
213
  end_idx = min(i + batch_size, len(list_of_documents))
214
  vector_store.add_documents(
@@ -227,7 +244,9 @@ def create_workflow(vector_store):
227
  """You are an assistant for responding to Request For Proposal documents for a
228
  bidder in the field of Data Science and Engineering. Use the following pieces
229
  of retrieved context to respond to the requests. If you don't know the answer,
230
- just say that you don't know.
 
 
231
  Question: {question}
232
  Context: {context}
233
  Answer:"""
@@ -245,6 +264,31 @@ def create_workflow(vector_store):
245
 
246
  return rag_chain
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  def handle_upload(zip_file, csv_file):
249
  """Handle file uploads and process requirements."""
250
  try:
@@ -256,10 +300,8 @@ def handle_upload(zip_file, csv_file):
256
  with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
257
  zip_ref.extractall(temp_dir)
258
 
259
- # Read requirements CSV
260
- requirements_df = pd.read_csv(csv_file.name, encoding='latin-1')
261
- if 'requirement' not in requirements_df.columns:
262
- raise ValueError("CSV file must contain a 'requirement' column")
263
 
264
  # Setup RAG system
265
  vector_store = setup_rag_system(temp_dir)
@@ -289,19 +331,23 @@ def handle_upload(zip_file, csv_file):
289
  except Exception as e:
290
  return pd.DataFrame([{'error': str(e)}])
291
 
292
- # Create and launch the Gradio interface
293
- iface = gr.Interface(
294
- fn=handle_upload,
295
- inputs=[
296
- gr.File(label="Upload ZIP folder containing URLs"),
297
- gr.File(label="Upload Requirements CSV")
298
- ],
299
- outputs=gr.Dataframe(),
300
- title="RAG System for RFP Analysis",
301
- description="Upload a ZIP folder containing URL documents and a CSV file with requirements to analyze.",
302
- examples=[],
303
- cache_examples=False
304
- )
 
 
 
 
305
 
306
  if __name__ == "__main__":
307
- iface.launch()
 
140
  from langchain_community.tools.tavily_search import TavilySearchResults
141
  from langgraph.graph import END, StateGraph, START
142
  import chromadb
143
+ import io
144
+
145
+ # Environment variables setup
146
+ os.environ["TAVILY_API_KEY"] = "YOUR_TAVILY_API_KEY"
147
+ os.environ["NVIDIA_API_KEY"] = "YOUR_NVIDIA_API_KEY"
148
+ os.environ["LANGCHAIN_PROJECT"] = "RAG project"
149
+
150
+ class GradeDocuments(BaseModel):
151
+ """Binary score for relevance check on retrieved documents."""
152
+ binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")
153
+
154
+ class GraphState(TypedDict):
155
+ """Represents the state of our graph."""
156
+ question: str
157
+ generation: str
158
+ decision: str
159
+ documents: List[str]
160
 
161
  def process_documents(temp_dir):
162
  """Process documents from the extracted zip folder."""
 
217
 
218
  # Setup vector store
219
  ids = [str(i) for i in df['chunk_id'].to_list()]
220
+ client = chromadb.PersistentClient(path=tempfile.mkdtemp())
221
  vector_store = Chroma(
222
  client=client,
223
  collection_name="rag-chroma",
 
225
  )
226
 
227
  # Add documents in batches
228
+ batch_size = 100
229
  for i in range(0, len(list_of_documents), batch_size):
230
  end_idx = min(i + batch_size, len(list_of_documents))
231
  vector_store.add_documents(
 
244
  """You are an assistant for responding to Request For Proposal documents for a
245
  bidder in the field of Data Science and Engineering. Use the following pieces
246
  of retrieved context to respond to the requests. If you don't know the answer,
247
+ just say that you don't know. Provide detailed responses with specific examples
248
+ and capabilities where possible.
249
+
250
  Question: {question}
251
  Context: {context}
252
  Answer:"""
 
264
 
265
  return rag_chain
266
 
267
+ def preprocess_csv(csv_file):
268
+ """Preprocess the CSV file to ensure proper format."""
269
+ try:
270
+ # First try reading as is
271
+ df = pd.read_csv(csv_file.name, encoding='latin-1')
272
+
273
+ # If there's only one column and no header
274
+ if len(df.columns) == 1 and df.columns[0] != 'requirement':
275
+ # Read again with no header and assign column name
276
+ df = pd.read_csv(csv_file.name, encoding='latin-1', header=None, names=['requirement'])
277
+
278
+ # If there's no 'requirement' column, assume first column is requirements
279
+ if 'requirement' not in df.columns:
280
+ df = df.rename(columns={df.columns[0]: 'requirement'})
281
+
282
+ return df
283
+ except Exception as e:
284
+ # If standard CSV reading fails, try reading as plain text
285
+ try:
286
+ with open(csv_file.name, 'r', encoding='latin-1') as f:
287
+ requirements = f.read().strip().split('\n')
288
+ return pd.DataFrame({'requirement': requirements})
289
+ except Exception as e2:
290
+ raise ValueError(f"Could not process CSV file: {str(e2)}")
291
+
292
  def handle_upload(zip_file, csv_file):
293
  """Handle file uploads and process requirements."""
294
  try:
 
300
  with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
301
  zip_ref.extractall(temp_dir)
302
 
303
+ # Preprocess and read requirements CSV
304
+ requirements_df = preprocess_csv(csv_file)
 
 
305
 
306
  # Setup RAG system
307
  vector_store = setup_rag_system(temp_dir)
 
331
  except Exception as e:
332
  return pd.DataFrame([{'error': str(e)}])
333
 
334
+ def main():
335
+ """Main function to run the Gradio interface."""
336
+ iface = gr.Interface(
337
+ fn=handle_upload,
338
+ inputs=[
339
+ gr.File(label="Upload ZIP folder containing URLs", file_types=[".zip"]),
340
+ gr.File(label="Upload Requirements CSV", file_types=[".csv", ".txt"])
341
+ ],
342
+ outputs=gr.Dataframe(),
343
+ title="RAG System for RFP Analysis",
344
+ description="""Upload a ZIP folder containing URL documents and a CSV file with requirements to analyze.
345
+ The CSV file should contain requirements either as a single column or with a 'requirement' column header.""",
346
+ examples=[],
347
+ cache_examples=False
348
+ )
349
+
350
+ iface.launch(share=True)
351
 
352
  if __name__ == "__main__":
353
+ main()