import os import re from langchain.chains import RetrievalQA from langchain.llms import OpenAI as LangchainOpenAI from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma import panel as pn from panel.template import VanillaTemplate # Custom CSS to use Lato font lato_font_style = """ .bk, .bk-root { font-family: 'Lato', sans-serif !important; } """ # Create a custom template template = VanillaTemplate( raw_css=[lato_font_style], css_files=['https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap'] ) # Set global sizing mode pn.config.sizing_mode = 'stretch_width' # Panel extension pn.extension() class LangchainConversation: def __init__(self): self.file_input = pn.widgets.FileInput(height=45) self.openaikey = pn.widgets.PasswordInput(value="", placeholder="Enter your OpenAI API Key here...", height=45) self.chatbox = pn.widgets.ChatBox(height=300, primary_name="User") self.chatbox.param.watch(self._chat, 'value') self.chat_history = [] # Chat history to store previous queries and responses def _chat(self, event): user_message = event.new[-1] input = user_message.get("User") if input is None: return os.environ["OPENAI_API_KEY"] = self.openaikey.value if self.file_input.value is not None: self.file_input.save("/.cache/temp.pdf") prompt_text = self.remove_empty_lines(input) if prompt_text: result = self.qa(file="/.cache/temp.pdf", query=prompt_text) self.chatbox.append({"AI": result}) @staticmethod def remove_empty_lines(text): lines = re.split(r'\\r\\n|\\r|\\n', text) return '\\n'.join([line.strip() for line in lines if line.strip()]) def qa(self, file, query): # Consider chat history when processing new queries chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history]) # Load, split, and analyze the document using the default text splitter loader = PyPDFLoader(file) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Default text splitting texts = text_splitter.split_documents(documents) embeddings = OpenAIEmbeddings() db = Chroma.from_documents(texts, embeddings) retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3}) qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True) result = qa({"query": query + "\\n" + chat_history_str}) # Update chat history self.chat_history.append((query, result['result'])) return result['result'] def view(self): custom_link_style = { 'color': '#1b9aaa', 'font-weight': 'bold' } layout = pn.Column( pn.pane.Markdown(""" # DocuChat AI-Powered Query Engine for Document Insights (powered by LangChain & OpenAI) ## How it works: 1) Upload a PDF 2) Enter your OpenAI API key (get one via [OpenAI](https://platform.openai.com/account)) 3) Type a question and your document will get analyzed for an answer Built by McKenzie. """, styles=custom_link_style), pn.Row(self.file_input, self.openaikey), self.chatbox ) # Add layout to the template template.main.append(layout) # Serve the template return template.servable() langchain_conversation = LangchainConversation() langchain_conversation.view()