test-ciol

Sleeping

App Files Files Community

DrishtiSharma commited on Dec 5, 2024

Commit

3570753

verified ·

1 Parent(s): ec7ddd1

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -11

app.py CHANGED Viewed

@@ -28,33 +28,42 @@ class DocumentRAG:
         if not self.api_key:
             raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
-    def process_documents(self, file_paths):
         if not self.api_key:
             return "Please set the OpenAI API key in the environment variables."
-        if not file_paths:
             return "Please upload documents first."
         try:
             documents = []
-            for file_path in file_paths:
-                if file_path.name.endswith('.pdf'):
-                    loader = PyPDFLoader(file_path.name)
-                elif file_path.name.endswith('.txt'):
-                    loader = TextLoader(file_path.name)
-                elif file_path.name.endswith('.csv'):
-                    loader = CSVLoader(file_path.name)
                 else:
                     continue
                 try:
                     documents.extend(loader.load())
                 except Exception as e:
-                    print(f"Error loading {file_path.name}: {str(e)}")
                     continue
             if not documents:
                 return "No valid documents were processed. Please check your files."
             text_splitter = RecursiveCharacterTextSplitter(
                 chunk_size=1000,
                 chunk_overlap=200,
@@ -62,9 +71,11 @@ class DocumentRAG:
             )
             documents = text_splitter.split_documents(documents)
             combined_text = " ".join([doc.page_content for doc in documents])
             self.document_summary = self.generate_summary(combined_text)
             embeddings = OpenAIEmbeddings(api_key=self.api_key)
             self.document_store = Chroma.from_documents(documents, embeddings)
             self.qa_chain = ConversationalRetrievalChain.from_llm(
@@ -80,7 +91,7 @@ class DocumentRAG:
             return f"Error processing documents: {str(e)}"
     def generate_summary(self, text):
-        """Generate a summary of the uploaded documents."""
         if not self.api_key:
             return "API Key not set. Please set it in the environment variables."
         try:

         if not self.api_key:
             raise ValueError("API Key not found. Make sure to set the 'OPENAI_API_KEY' environment variable.")
+    def process_documents(self, uploaded_files):
+        """Process uploaded files by saving them temporarily and extracting content."""
         if not self.api_key:
             return "Please set the OpenAI API key in the environment variables."
+        if not uploaded_files:
             return "Please upload documents first."
         try:
             documents = []
+            for uploaded_file in uploaded_files:
+                # Save uploaded file to a temporary location
+                temp_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]).name
+                with open(temp_file_path, "wb") as temp_file:
+                    temp_file.write(uploaded_file.read())
+                # Determine the loader based on the file type
+                if temp_file_path.endswith('.pdf'):
+                    loader = PyPDFLoader(temp_file_path)
+                elif temp_file_path.endswith('.txt'):
+                    loader = TextLoader(temp_file_path)
+                elif temp_file_path.endswith('.csv'):
+                    loader = CSVLoader(temp_file_path)
                 else:
                     continue
+                # Load the documents
                 try:
                     documents.extend(loader.load())
                 except Exception as e:
+                    print(f"Error loading {temp_file_path}: {str(e)}")
                     continue
             if not documents:
                 return "No valid documents were processed. Please check your files."
+            # Split text for better processing
             text_splitter = RecursiveCharacterTextSplitter(
                 chunk_size=1000,
                 chunk_overlap=200,
             )
             documents = text_splitter.split_documents(documents)
+            # Combine text for summary
             combined_text = " ".join([doc.page_content for doc in documents])
             self.document_summary = self.generate_summary(combined_text)
+            # Create embeddings and initialize retrieval chain
             embeddings = OpenAIEmbeddings(api_key=self.api_key)
             self.document_store = Chroma.from_documents(documents, embeddings)
             self.qa_chain = ConversationalRetrievalChain.from_llm(
             return f"Error processing documents: {str(e)}"
     def generate_summary(self, text):
+        """Generate a summary of the provided text."""
         if not self.api_key:
             return "API Key not set. Please set it in the environment variables."
         try: