DocuChat_2

Sleeping

mckplus commited on Aug 17, 2023

Commit

af081bd

•

1 Parent(s): 0c14d50

Update DocuChat.py

Files changed (1) hide show

DocuChat.py CHANGED Viewed

@@ -37,23 +37,23 @@ class LangchainConversation:
     @staticmethod
     def remove_empty_lines(text):
-        lines = re.split(r'\r\n|\r|\n', text)
-        return '\n'.join([line.strip() for line in lines if line.strip()])
     def qa(self, file, query):
         # Consider chat history when processing new queries
-        chat_history_str = "\n".join([f"User: {q}\nAI: {a}" for q, a in self.chat_history])
-        # Load, split, and analyze the entire document
         loader = PyPDFLoader(file)
         documents = loader.load()
-        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, context_aware=True)  # Context-aware splitting
         texts = text_splitter.split_documents(documents)
         embeddings = OpenAIEmbeddings()
         db = Chroma.from_documents(texts, embeddings)
         retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
         qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
-        result = qa({"query": query + "\n" + chat_history_str})
         # Update chat history
         self.chat_history.append((query, result['result']))

     @staticmethod
     def remove_empty_lines(text):
+        lines = re.split(r'\\r\\n|\\r|\\n', text)
+        return '\\n'.join([line.strip() for line in lines if line.strip()])
     def qa(self, file, query):
         # Consider chat history when processing new queries
+        chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history])
+        # Load, split, and analyze the document using the default text splitter
         loader = PyPDFLoader(file)
         documents = loader.load()
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)  # Default text splitting
         texts = text_splitter.split_documents(documents)
         embeddings = OpenAIEmbeddings()
         db = Chroma.from_documents(texts, embeddings)
         retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
         qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
+        result = qa({"query": query + "\\n" + chat_history_str})
         # Update chat history
         self.chat_history.append((query, result['result']))