DocuChat_2 / DocuChat.py
mckplus's picture
Update DocuChat.py
10f6cc3
raw
history blame contribute delete
No virus
4.2 kB
import os
import re
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI as LangchainOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import panel as pn
from panel.template import VanillaTemplate
CSS = """
#header {
height: 0;
padding: 0;
}
.pn-busy-container {
visibility: hidden;
}
"""
pn.extension(raw_css=[CSS])
# Custom CSS to use Lato font
lato_font_style = """
.bk, .bk-root {
font-family: 'Lato', sans-serif !important;
}
"""
# Create a custom template
template = VanillaTemplate(
raw_css=[lato_font_style],
css_files=['https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap']
)
# Set global sizing mode
pn.config.sizing_mode = 'stretch_width'
# Panel extension
pn.extension()
class LangchainConversation:
def __init__(self):
self.file_input = pn.widgets.FileInput(height=45)
self.openaikey = pn.widgets.PasswordInput(value="", placeholder="Enter your OpenAI API Key here...", height=45)
self.chatbox = pn.widgets.ChatBox(height=300, primary_name="User")
self.chatbox.param.watch(self._chat, 'value')
self.chat_history = [] # Chat history to store previous queries and responses
def _chat(self, event):
user_message = event.new[-1]
input = user_message.get("User")
if input is None:
return
os.environ["OPENAI_API_KEY"] = self.openaikey.value
if self.file_input.value is not None:
file_path = "/.cache/temp.pdf"
self.file_input.save(file_path)
# Check if the uploaded file is a PDF
if self.file_input.filename.lower().endswith('.pdf'):
prompt_text = self.remove_empty_lines(input)
if prompt_text:
result = self.qa(file=file_path, query=prompt_text)
self.chatbox.append({"AI": result})
else:
# Delete the non-PDF file from cache
os.remove(file_path)
# Display an error message in the chatbox
self.chatbox.append({"AI": "Error: Only PDF files are allowed. Please upload a valid PDF file."})
@staticmethod
def remove_empty_lines(text):
lines = re.split(r'\\r\\n|\\r|\\n', text)
return '\\n'.join([line.strip() for line in lines if line.strip()])
def qa(self, file, query):
# Consider chat history when processing new queries
chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history])
# Load, split, and analyze the document using the default text splitter
loader = PyPDFLoader(file)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Default text splitting
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
result = qa({"query": query + "\\n" + chat_history_str})
# Update chat history
self.chat_history.append((query, result['result']))
return result['result']
def view(self):
custom_link_style = {
'color': '#1b9aaa',
'font-weight': 'bold'
}
layout = pn.Column(
pn.pane.Markdown("""
Built by <a href="https://www.mckenzielloydsmith.com/home?utm_source=HuggingFace&utm_medium=PDF+Analyzer" target="_blank">McKenzie</a>.
"""), # Closing parenthesis for pn.pane.Markdown
pn.Row(self.file_input, self.openaikey), self.chatbox
)
# Add layout to the template
template.main.append(layout)
# Serve the template
return template.servable()
langchain_conversation = LangchainConversation()
langchain_conversation.view()