Spaces:
Sleeping
Sleeping
import os | |
import re | |
from langchain.chains import RetrievalQA | |
from langchain.llms import OpenAI as LangchainOpenAI | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
import panel as pn | |
from panel.template import VanillaTemplate | |
CSS = """ | |
#header { | |
height: 0; | |
padding: 0; | |
} | |
.pn-busy-container { | |
visibility: hidden; | |
} | |
""" | |
pn.extension(raw_css=[CSS]) | |
# Custom CSS to use Lato font | |
lato_font_style = """ | |
.bk, .bk-root { | |
font-family: 'Lato', sans-serif !important; | |
} | |
""" | |
# Create a custom template | |
template = VanillaTemplate( | |
raw_css=[lato_font_style], | |
css_files=['https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap'] | |
) | |
# Set global sizing mode | |
pn.config.sizing_mode = 'stretch_width' | |
# Panel extension | |
pn.extension() | |
class LangchainConversation: | |
def __init__(self): | |
self.file_input = pn.widgets.FileInput(height=45) | |
self.openaikey = pn.widgets.PasswordInput(value="", placeholder="Enter your OpenAI API Key here...", height=45) | |
self.chatbox = pn.widgets.ChatBox(height=300, primary_name="User") | |
self.chatbox.param.watch(self._chat, 'value') | |
self.chat_history = [] # Chat history to store previous queries and responses | |
def _chat(self, event): | |
user_message = event.new[-1] | |
input = user_message.get("User") | |
if input is None: | |
return | |
os.environ["OPENAI_API_KEY"] = self.openaikey.value | |
if self.file_input.value is not None: | |
file_path = "/.cache/temp.pdf" | |
self.file_input.save(file_path) | |
# Check if the uploaded file is a PDF | |
if self.file_input.filename.lower().endswith('.pdf'): | |
prompt_text = self.remove_empty_lines(input) | |
if prompt_text: | |
result = self.qa(file=file_path, query=prompt_text) | |
self.chatbox.append({"AI": result}) | |
else: | |
# Delete the non-PDF file from cache | |
os.remove(file_path) | |
# Display an error message in the chatbox | |
self.chatbox.append({"AI": "Error: Only PDF files are allowed. Please upload a valid PDF file."}) | |
def remove_empty_lines(text): | |
lines = re.split(r'\\r\\n|\\r|\\n', text) | |
return '\\n'.join([line.strip() for line in lines if line.strip()]) | |
def qa(self, file, query): | |
# Consider chat history when processing new queries | |
chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history]) | |
# Load, split, and analyze the document using the default text splitter | |
loader = PyPDFLoader(file) | |
documents = loader.load() | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Default text splitting | |
texts = text_splitter.split_documents(documents) | |
embeddings = OpenAIEmbeddings() | |
db = Chroma.from_documents(texts, embeddings) | |
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3}) | |
qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True) | |
result = qa({"query": query + "\\n" + chat_history_str}) | |
# Update chat history | |
self.chat_history.append((query, result['result'])) | |
return result['result'] | |
def view(self): | |
custom_link_style = { | |
'color': '#1b9aaa', | |
'font-weight': 'bold' | |
} | |
layout = pn.Column( | |
pn.pane.Markdown(""" | |
Built by <a href="https://www.mckenzielloydsmith.com/home?utm_source=HuggingFace&utm_medium=PDF+Analyzer" target="_blank">McKenzie</a>. | |
"""), # Closing parenthesis for pn.pane.Markdown | |
pn.Row(self.file_input, self.openaikey), self.chatbox | |
) | |
# Add layout to the template | |
template.main.append(layout) | |
# Serve the template | |
return template.servable() | |
langchain_conversation = LangchainConversation() | |
langchain_conversation.view() | |