mckplus commited on
Commit
af081bd
1 Parent(s): 0c14d50

Update DocuChat.py

Browse files
Files changed (1) hide show
  1. DocuChat.py +6 -6
DocuChat.py CHANGED
@@ -37,23 +37,23 @@ class LangchainConversation:
37
 
38
  @staticmethod
39
  def remove_empty_lines(text):
40
- lines = re.split(r'\r\n|\r|\n', text)
41
- return '\n'.join([line.strip() for line in lines if line.strip()])
42
 
43
  def qa(self, file, query):
44
  # Consider chat history when processing new queries
45
- chat_history_str = "\n".join([f"User: {q}\nAI: {a}" for q, a in self.chat_history])
46
 
47
- # Load, split, and analyze the entire document
48
  loader = PyPDFLoader(file)
49
  documents = loader.load()
50
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, context_aware=True) # Context-aware splitting
51
  texts = text_splitter.split_documents(documents)
52
  embeddings = OpenAIEmbeddings()
53
  db = Chroma.from_documents(texts, embeddings)
54
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
55
  qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
56
- result = qa({"query": query + "\n" + chat_history_str})
57
 
58
  # Update chat history
59
  self.chat_history.append((query, result['result']))
 
37
 
38
  @staticmethod
39
  def remove_empty_lines(text):
40
+ lines = re.split(r'\\r\\n|\\r|\\n', text)
41
+ return '\\n'.join([line.strip() for line in lines if line.strip()])
42
 
43
  def qa(self, file, query):
44
  # Consider chat history when processing new queries
45
+ chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history])
46
 
47
+ # Load, split, and analyze the document using the default text splitter
48
  loader = PyPDFLoader(file)
49
  documents = loader.load()
50
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Default text splitting
51
  texts = text_splitter.split_documents(documents)
52
  embeddings = OpenAIEmbeddings()
53
  db = Chroma.from_documents(texts, embeddings)
54
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
55
  qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
56
+ result = qa({"query": query + "\\n" + chat_history_str})
57
 
58
  # Update chat history
59
  self.chat_history.append((query, result['result']))