DocuChat_2

Sleeping

App Files Files Community

DocuChat_2 / DocuChat.py

mckplus

Update DocuChat.py

10f6cc3 11 months ago

raw

history blame contribute delete

No virus

4.2 kB

	import os
	import re
	from langchain.chains import RetrievalQA
	from langchain.llms import OpenAI as LangchainOpenAI
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	import panel as pn
	from panel.template import VanillaTemplate

	CSS = """
	#header {
	height: 0;
	padding: 0;
	}

	.pn-busy-container {
	visibility: hidden;
	}
	"""

	pn.extension(raw_css=[CSS])

	# Custom CSS to use Lato font
	lato_font_style = """
	.bk, .bk-root {
	font-family: 'Lato', sans-serif !important;
	}
	"""

	# Create a custom template
	template = VanillaTemplate(
	raw_css=[lato_font_style],
	css_files=['https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap']
	)

	# Set global sizing mode
	pn.config.sizing_mode = 'stretch_width'

	# Panel extension
	pn.extension()

	class LangchainConversation:
	def __init__(self):
	self.file_input = pn.widgets.FileInput(height=45)
	self.openaikey = pn.widgets.PasswordInput(value="", placeholder="Enter your OpenAI API Key here...", height=45)
	self.chatbox = pn.widgets.ChatBox(height=300, primary_name="User")
	self.chatbox.param.watch(self._chat, 'value')
	self.chat_history = [] # Chat history to store previous queries and responses

	def _chat(self, event):
	user_message = event.new[-1]
	input = user_message.get("User")
	if input is None:
	return
	os.environ["OPENAI_API_KEY"] = self.openaikey.value
	if self.file_input.value is not None:
	file_path = "/.cache/temp.pdf"
	self.file_input.save(file_path)
	# Check if the uploaded file is a PDF
	if self.file_input.filename.lower().endswith('.pdf'):
	prompt_text = self.remove_empty_lines(input)
	if prompt_text:
	result = self.qa(file=file_path, query=prompt_text)
	self.chatbox.append({"AI": result})
	else:
	# Delete the non-PDF file from cache
	os.remove(file_path)
	# Display an error message in the chatbox
	self.chatbox.append({"AI": "Error: Only PDF files are allowed. Please upload a valid PDF file."})

	@staticmethod
	def remove_empty_lines(text):
	lines = re.split(r'\\r\\n\|\\r\|\\n', text)
	return '\\n'.join([line.strip() for line in lines if line.strip()])

	def qa(self, file, query):
	# Consider chat history when processing new queries
	chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history])

	# Load, split, and analyze the document using the default text splitter
	loader = PyPDFLoader(file)
	documents = loader.load()
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Default text splitting
	texts = text_splitter.split_documents(documents)
	embeddings = OpenAIEmbeddings()
	db = Chroma.from_documents(texts, embeddings)
	retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
	qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
	result = qa({"query": query + "\\n" + chat_history_str})

	# Update chat history
	self.chat_history.append((query, result['result']))

	return result['result']


	def view(self):
	custom_link_style = {
	'color': '#1b9aaa',
	'font-weight': 'bold'
	}

	layout = pn.Column(
	pn.pane.Markdown("""
	Built by <a href="https://www.mckenzielloydsmith.com/home?utm_source=HuggingFace&utm_medium=PDF+Analyzer" target="_blank">McKenzie</a>.
	"""), # Closing parenthesis for pn.pane.Markdown
	pn.Row(self.file_input, self.openaikey), self.chatbox
	)

	# Add layout to the template
	template.main.append(layout)

	# Serve the template
	return template.servable()

	langchain_conversation = LangchainConversation()
	langchain_conversation.view()