version3

Sleeping

App Files Files Community

DrishtiSharma commited on Dec 5, 2024

Commit

ae9100f

verified ·

1 Parent(s): afdff8e

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -5

app.py CHANGED Viewed

@@ -14,7 +14,17 @@ from langchain_community.document_loaders import (
 from datetime import datetime
 import pytz
-# DocumentRAG class with environment variable support for API Key
 class DocumentRAG:
     def __init__(self):
         self.document_store = None
@@ -28,6 +38,10 @@ class DocumentRAG:
         if not self.api_key:
             raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
     def process_documents(self, uploaded_files):
         """Process uploaded files by saving them temporarily and extracting content."""
         if not self.api_key:
@@ -51,14 +65,13 @@ class DocumentRAG:
                 elif temp_file_path.endswith('.csv'):
                     loader = CSVLoader(temp_file_path)
                 else:
-                    continue
                 # Load the documents
                 try:
                     documents.extend(loader.load())
                 except Exception as e:
-                    print(f"Error loading {temp_file_path}: {str(e)}")
-                    continue
             if not documents:
                 return "No valid documents were processed. Please check your files."
@@ -77,7 +90,12 @@ class DocumentRAG:
             # Create embeddings and initialize retrieval chain
             embeddings = OpenAIEmbeddings(api_key=self.api_key)
-            self.document_store = Chroma.from_documents(documents, embeddings)
             self.qa_chain = ConversationalRetrievalChain.from_llm(
                 ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
                 self.document_store.as_retriever(search_kwargs={'k': 6}),
@@ -109,6 +127,7 @@ class DocumentRAG:
             return f"Error generating summary: {str(e)}"
     def handle_query(self, question, history):
         if not self.qa_chain:
             return history + [("System", "Please process the documents first.")]
         try:

 from datetime import datetime
 import pytz
+from langchain.chains import ConversationalRetrievalChain
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_community.document_loaders import PyPDFLoader, TextLoader, CSVLoader
+import os
+import tempfile
+from datetime import datetime
+import pytz
 class DocumentRAG:
     def __init__(self):
         self.document_store = None
         if not self.api_key:
             raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
+        # Persistent directory for Chroma to avoid tenant-related errors
+        self.chroma_persist_dir = "./chroma_storage"
+        os.makedirs(self.chroma_persist_dir, exist_ok=True)
     def process_documents(self, uploaded_files):
         """Process uploaded files by saving them temporarily and extracting content."""
         if not self.api_key:
                 elif temp_file_path.endswith('.csv'):
                     loader = CSVLoader(temp_file_path)
                 else:
+                    return f"Unsupported file type: {uploaded_file.name}"
                 # Load the documents
                 try:
                     documents.extend(loader.load())
                 except Exception as e:
+                    return f"Error loading {uploaded_file.name}: {str(e)}"
             if not documents:
                 return "No valid documents were processed. Please check your files."
             # Create embeddings and initialize retrieval chain
             embeddings = OpenAIEmbeddings(api_key=self.api_key)
+            self.document_store = Chroma.from_documents(
+                documents,
+                embeddings,
+                persist_directory=self.chroma_persist_dir  # Persistent directory for Chroma
+            )
             self.qa_chain = ConversationalRetrievalChain.from_llm(
                 ChatOpenAI(temperature=0, model_name='gpt-4', api_key=self.api_key),
                 self.document_store.as_retriever(search_kwargs={'k': 6}),
             return f"Error generating summary: {str(e)}"
     def handle_query(self, question, history):
+        """Handle user queries."""
         if not self.qa_chain:
             return history + [("System", "Please process the documents first.")]
         try: