Spaces:
Sleeping
Sleeping
import os | |
import re | |
from langchain.chains import RetrievalQA | |
from langchain.llms import OpenAI as LangchainOpenAI | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
import panel as pn | |
# Include Lato font | |
lato_font_link = "<link href='https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap' rel='stylesheet'>" | |
pn.config.raw_css.append(lato_font_link) | |
# Custom CSS to use Lato font | |
pn.config.raw_css.append(""" | |
.bk, .bk-root, .bk-widget { | |
font-family: 'Lato', sans-serif !important; | |
} | |
.mckenzie-link a { | |
font-weight: bold; | |
color: #1b9aaa; | |
} | |
""") | |
# Set global sizing mode | |
pn.config.sizing_mode = 'stretch_width' | |
# Panel extension | |
pn.extension() | |
class LangchainConversation: | |
def __init__(self): | |
self.file_input = pn.widgets.FileInput(height=45) | |
self.openaikey = pn.widgets.PasswordInput(value="", placeholder="Enter your OpenAI API Key here...", height=45) | |
self.chatbox = pn.widgets.ChatBox(height=300, primary_name="User") | |
self.chatbox.param.watch(self._chat, 'value') | |
self.chat_history = [] # Chat history to store previous queries and responses | |
def _chat(self, event): | |
user_message = event.new[-1] | |
input = user_message.get("User") | |
if input is None: | |
return | |
os.environ["OPENAI_API_KEY"] = self.openaikey.value | |
if self.file_input.value is not None: | |
self.file_input.save("/.cache/temp.pdf") | |
prompt_text = self.remove_empty_lines(input) | |
if prompt_text: | |
result = self.qa(file="/.cache/temp.pdf", query=prompt_text) | |
self.chatbox.append({"AI": result}) | |
def remove_empty_lines(text): | |
lines = re.split(r'\\r\\n|\\r|\\n', text) | |
return '\\n'.join([line.strip() for line in lines if line.strip()]) | |
def qa(self, file, query): | |
# Consider chat history when processing new queries | |
chat_history_str = "\\n".join([f"User: {q}\\nAI: {a}" for q, a in self.chat_history]) | |
# Load, split, and analyze the document using the default text splitter | |
loader = PyPDFLoader(file) | |
documents = loader.load() | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) # Default text splitting | |
texts = text_splitter.split_documents(documents) | |
embeddings = OpenAIEmbeddings() | |
db = Chroma.from_documents(texts, embeddings) | |
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3}) | |
qa = RetrievalQA.from_chain_type(llm=LangchainOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True) | |
result = qa({"query": query + "\\n" + chat_history_str}) | |
# Update chat history | |
self.chat_history.append((query, result['result'])) | |
return result['result'] | |
def view(self): | |
layout = pn.Column( | |
pn.pane.Markdown(""" | |
# DocuChat | |
AI-Powered Query Engine for Document Insights (powered by LangChain & OpenAI) | |
## How it works: | |
1) Upload a PDF | |
2) Enter your OpenAI API key (get one via [OpenAI](https://platform.openai.com/account)) | |
3) Type a question and your document will get analyzed for an answer | |
Built by <span class="mckenzie-link">[McKenzie](https://www.mckenzielloydsmith.com/home?utm_source=HuggingFace&utm_medium=PDF+Analyzer)</span>. | |
"""), | |
pn.Row(self.file_input, self.openaikey), self.chatbox | |
).servable() | |
return layout | |
langchain_conversation = LangchainConversation() | |
langchain_conversation.view() |