Spaces:
Sleeping
Sleeping
Update DocuChat.py
Browse files- DocuChat.py +6 -6
DocuChat.py
CHANGED
@@ -37,23 +37,23 @@ class LangchainConversation:
|
|
37 |
|
38 |
@staticmethod
|
39 |
def remove_empty_lines(text):
|
40 |
-
lines = re.split(r'
|
41 |
-
return '
|
42 |
|
43 |
def qa(self, file, query):
|
44 |
# Consider chat history when processing new queries
|
45 |
-
chat_history_str = "
|
46 |
|
47 |
-
# Load, split, and analyze the
|
48 |
loader = PyPDFLoader(file)
|
49 |
documents = loader.load()
|
50 |
-
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0
|
51 |
texts = text_splitter.split_documents(documents)
|
52 |
embeddings = OpenAIEmbeddings()
|
53 |
db = Chroma.from_documents(texts, embeddings)
|
54 |
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
55 |
qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
|
56 |
-
result = qa({"query": query + "
|
57 |
|
58 |
# Update chat history
|
59 |
self.chat_history.append((query, result['result']))
|
|
|
37 |
|
38 |
@staticmethod
|
39 |
def remove_empty_lines(text):
|
40 |
+
lines = re.split(r'\\r\\n|\\r|\\n', text)
|
41 |
+
return '\\n'.join([line.strip() for line in lines if line.strip()])
|
42 |
|
43 |
def qa(self, file, query):
|
44 |
# Consider chat history when processing new queries
|
45 |
+
chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history])
|
46 |
|
47 |
+
# Load, split, and analyze the document using the default text splitter
|
48 |
loader = PyPDFLoader(file)
|
49 |
documents = loader.load()
|
50 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Default text splitting
|
51 |
texts = text_splitter.split_documents(documents)
|
52 |
embeddings = OpenAIEmbeddings()
|
53 |
db = Chroma.from_documents(texts, embeddings)
|
54 |
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
55 |
qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
|
56 |
+
result = qa({"query": query + "\\n" + chat_history_str})
|
57 |
|
58 |
# Update chat history
|
59 |
self.chat_history.append((query, result['result']))
|