Spaces:

Codequestt
/

ReqChek

Sleeping

App Files Files Community

Codequestt commited on Feb 11

Commit

ec80195

verified ·

1 Parent(s): 33cccc8

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -21

app.py CHANGED Viewed

@@ -140,6 +140,23 @@ from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langgraph.graph import END, StateGraph, START
 import chromadb
 def process_documents(temp_dir):
     """Process documents from the extracted zip folder."""
@@ -200,7 +217,7 @@ def setup_rag_system(temp_dir):
     # Setup vector store
     ids = [str(i) for i in df['chunk_id'].to_list()]
-    client = chromadb.PersistentClient(path=tempfile.mkdtemp())  # Use temporary directory
     vector_store = Chroma(
         client=client,
         collection_name="rag-chroma",
@@ -208,7 +225,7 @@ def setup_rag_system(temp_dir):
     )
     # Add documents in batches
-    batch_size = 100  # Smaller batch size for better memory management
     for i in range(0, len(list_of_documents), batch_size):
         end_idx = min(i + batch_size, len(list_of_documents))
         vector_store.add_documents(
@@ -227,7 +244,9 @@ def create_workflow(vector_store):
         """You are an assistant for responding to Request For Proposal documents for a
         bidder in the field of Data Science and Engineering. Use the following pieces
         of retrieved context to respond to the requests. If you don't know the answer,
-        just say that you don't know.
         Question: {question}
         Context: {context}
         Answer:"""
@@ -245,6 +264,31 @@ def create_workflow(vector_store):
     return rag_chain
 def handle_upload(zip_file, csv_file):
     """Handle file uploads and process requirements."""
     try:
@@ -256,10 +300,8 @@ def handle_upload(zip_file, csv_file):
             with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
                 zip_ref.extractall(temp_dir)
-            # Read requirements CSV
-            requirements_df = pd.read_csv(csv_file.name, encoding='latin-1')
-            if 'requirement' not in requirements_df.columns:
-                raise ValueError("CSV file must contain a 'requirement' column")
             # Setup RAG system
             vector_store = setup_rag_system(temp_dir)
@@ -289,19 +331,23 @@ def handle_upload(zip_file, csv_file):
     except Exception as e:
         return pd.DataFrame([{'error': str(e)}])
-# Create and launch the Gradio interface
-iface = gr.Interface(
-    fn=handle_upload,
-    inputs=[
-        gr.File(label="Upload ZIP folder containing URLs"),
-        gr.File(label="Upload Requirements CSV")
-    ],
-    outputs=gr.Dataframe(),
-    title="RAG System for RFP Analysis",
-    description="Upload a ZIP folder containing URL documents and a CSV file with requirements to analyze.",
-    examples=[],
-    cache_examples=False
-)
 if __name__ == "__main__":
-    iface.launch()

 from langchain_community.tools.tavily_search import TavilySearchResults
 from langgraph.graph import END, StateGraph, START
 import chromadb
+import io
+# Environment variables setup
+os.environ["TAVILY_API_KEY"] = "YOUR_TAVILY_API_KEY"
+os.environ["NVIDIA_API_KEY"] = "YOUR_NVIDIA_API_KEY"
+os.environ["LANGCHAIN_PROJECT"] = "RAG project"
+class GradeDocuments(BaseModel):
+    """Binary score for relevance check on retrieved documents."""
+    binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")
+class GraphState(TypedDict):
+    """Represents the state of our graph."""
+    question: str
+    generation: str
+    decision: str
+    documents: List[str]
 def process_documents(temp_dir):
     """Process documents from the extracted zip folder."""
     # Setup vector store
     ids = [str(i) for i in df['chunk_id'].to_list()]
+    client = chromadb.PersistentClient(path=tempfile.mkdtemp())
     vector_store = Chroma(
         client=client,
         collection_name="rag-chroma",
     )
     # Add documents in batches
+    batch_size = 100
     for i in range(0, len(list_of_documents), batch_size):
         end_idx = min(i + batch_size, len(list_of_documents))
         vector_store.add_documents(
         """You are an assistant for responding to Request For Proposal documents for a
         bidder in the field of Data Science and Engineering. Use the following pieces
         of retrieved context to respond to the requests. If you don't know the answer,
+        just say that you don't know. Provide detailed responses with specific examples
+        and capabilities where possible.
         Question: {question}
         Context: {context}
         Answer:"""
     return rag_chain
+def preprocess_csv(csv_file):
+    """Preprocess the CSV file to ensure proper format."""
+    try:
+        # First try reading as is
+        df = pd.read_csv(csv_file.name, encoding='latin-1')
+        # If there's only one column and no header
+        if len(df.columns) == 1 and df.columns[0] != 'requirement':
+            # Read again with no header and assign column name
+            df = pd.read_csv(csv_file.name, encoding='latin-1', header=None, names=['requirement'])
+        # If there's no 'requirement' column, assume first column is requirements
+        if 'requirement' not in df.columns:
+            df = df.rename(columns={df.columns[0]: 'requirement'})
+        return df
+    except Exception as e:
+        # If standard CSV reading fails, try reading as plain text
+        try:
+            with open(csv_file.name, 'r', encoding='latin-1') as f:
+                requirements = f.read().strip().split('\n')
+            return pd.DataFrame({'requirement': requirements})
+        except Exception as e2:
+            raise ValueError(f"Could not process CSV file: {str(e2)}")
 def handle_upload(zip_file, csv_file):
     """Handle file uploads and process requirements."""
     try:
             with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
                 zip_ref.extractall(temp_dir)
+            # Preprocess and read requirements CSV
+            requirements_df = preprocess_csv(csv_file)
             # Setup RAG system
             vector_store = setup_rag_system(temp_dir)
     except Exception as e:
         return pd.DataFrame([{'error': str(e)}])
+def main():
+    """Main function to run the Gradio interface."""
+    iface = gr.Interface(
+        fn=handle_upload,
+        inputs=[
+            gr.File(label="Upload ZIP folder containing URLs", file_types=[".zip"]),
+            gr.File(label="Upload Requirements CSV", file_types=[".csv", ".txt"])
+        ],
+        outputs=gr.Dataframe(),
+        title="RAG System for RFP Analysis",
+        description="""Upload a ZIP folder containing URL documents and a CSV file with requirements to analyze.
+                      The CSV file should contain requirements either as a single column or with a 'requirement' column header.""",
+        examples=[],
+        cache_examples=False
+    )
+    iface.launch(share=True)
 if __name__ == "__main__":
+    main()