DocuChat_2

Sleeping

App Files Files Community

DocuChat_2 / DocuChat.py

mckplus

Update DocuChat.py

23237f8 about 1 year ago

raw

history blame

No virus

3.81 kB

	import os
	import re
	from langchain.chains import RetrievalQA
	from langchain.llms import OpenAI as LangchainOpenAI
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	import panel as pn

	# Include Lato font
	lato_font_link = "<link href='https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap' rel='stylesheet'>"
	pn.config.raw_css.append(lato_font_link)

	# Custom CSS to use Lato font
	pn.config.raw_css.append("""
	.bk, .bk-root, .bk-widget {
	font-family: 'Lato', sans-serif !important;
	}
	.mckenzie-link a {
	font-weight: bold;
	color: #1b9aaa;
	}
	""")

	# Set global sizing mode
	pn.config.sizing_mode = 'stretch_width'

	# Panel extension
	pn.extension()

	class LangchainConversation:
	def __init__(self):
	self.file_input = pn.widgets.FileInput(height=45)
	self.openaikey = pn.widgets.PasswordInput(value="", placeholder="Enter your OpenAI API Key here...", height=45)
	self.chatbox = pn.widgets.ChatBox(height=300, primary_name="User")
	self.chatbox.param.watch(self._chat, 'value')
	self.chat_history = [] # Chat history to store previous queries and responses

	def _chat(self, event):
	user_message = event.new[-1]
	input = user_message.get("User")
	if input is None:
	return
	os.environ["OPENAI_API_KEY"] = self.openaikey.value
	if self.file_input.value is not None:
	self.file_input.save("/.cache/temp.pdf")
	prompt_text = self.remove_empty_lines(input)
	if prompt_text:
	result = self.qa(file="/.cache/temp.pdf", query=prompt_text)
	self.chatbox.append({"AI": result})

	@staticmethod
	def remove_empty_lines(text):
	lines = re.split(r'\\r\\n\|\\r\|\\n', text)
	return '\\n'.join([line.strip() for line in lines if line.strip()])

	def qa(self, file, query):
	# Consider chat history when processing new queries
	chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history])

	# Load, split, and analyze the document using the default text splitter
	loader = PyPDFLoader(file)
	documents = loader.load()
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Default text splitting
	texts = text_splitter.split_documents(documents)
	embeddings = OpenAIEmbeddings()
	db = Chroma.from_documents(texts, embeddings)
	retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
	qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
	result = qa({"query": query + "\\n" + chat_history_str})

	# Update chat history
	self.chat_history.append((query, result['result']))

	return result['result']

	def view(self):
	layout = pn.Column(
	pn.pane.Markdown("""
	# DocuChat
	AI-Powered Query Engine for Document Insights (powered by LangChain & OpenAI)
	## How it works:
	1) Upload a PDF
	2) Enter your OpenAI API key (get one via [OpenAI](https://platform.openai.com/account))
	3) Type a question and your document will get analyzed for an answer

	Built by <span class="mckenzie-link">[McKenzie](https://www.mckenzielloydsmith.com/home?utm_source=HuggingFace&utm_medium=PDF+Analyzer)</span>.
	"""),
	pn.Row(self.file_input, self.openaikey), self.chatbox
	).servable()
	return layout

	langchain_conversation = LangchainConversation()
	langchain_conversation.view()