chatbot_rag / app.py
jamesthong's picture
Update app.py
a971189 verified
# import bs4
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
import gradio as gr
repo_id = "HuggingFaceH4/zephyr-7b-beta"
llm = HuggingFaceEndpoint(
repo_id=repo_id, max_length=128, temperature=0.1
)
def web_load(path):
loader = WebBaseLoader(
web_paths=(path,),
# bs_kwargs=dict(
# parse_only=bs4.SoupStrainer(
# class_=("post-content", "post-title", "post-header")
# )
# ),
)
docs = loader.load()
return docs
def pdf_load(path):
loader = PyPDFLoader(path)
pages = loader.load_and_split()
return pages
def vector_store(path):
if path.endswith(".pdf"):
docs = pdf_load(path)
elif path.startswith("http" or "www"):
docs = web_load(path)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))
return vectorstore, "Done setup! You may proceed to Chatbot. "
def invoke(user_input, retriever):
prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:
{context}
</s>
<|user|>
{question}
</s>
<|assistant|>
"""
prompt = PromptTemplate(
input_variables=["context", "question"],
template=prompt_template,
)
rag_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
ans = rag_chain.invoke(user_input)
return ans
def rag_chatbot(vectorstore, user_input, chat_history):
retriever = vectorstore.as_retriever()
answer = invoke(user_input, retriever)
chat_history.append((user_input, answer))
return "", chat_history
def source (radio, source1, source2):
if radio == "website":
return source1
elif radio == "PDF":
return source2
with gr.Blocks() as demo:
vectorstore = gr.State()
with gr.Tab("Setup"):
gr.Markdown("Input a website ULR or upload a PDF file")
with gr.Row():
source1 = gr.Textbox(label="Input website",)
source2 = gr.Files(label="Upload a PDF file", file_count="single", file_types=["pdf"])
radio = gr.Radio(["website", "PDF"], label="Select type of source", interactive=True)
path = gr.Textbox(label="Path of source", visible=True, interactive=False)
radio.change(fn=source, inputs=[radio,source1,source2], outputs=path)
source1.change(fn=source, inputs=[radio,source1,source2], outputs=path)
source2.change(fn=source, inputs=[radio,source1,source2], outputs=path)
done = gr.Textbox(label="Progress", interactive=False)
setup_btn = gr.Button("Initialize vectorstore")
setup_btn.click(fn=vector_store, inputs=[path], outputs=[vectorstore, done])
with gr.Tab("Chatbot"):
chatbot = gr.Chatbot()
msg = gr.Textbox()
with gr.Row():
clear = gr.ClearButton([msg, chatbot], icon="https://img.icons8.com/?size=100&id=Xnx8cxDef16O&format=png&color=000000")
send_btn = gr.Button("Send", variant='primary', icon="https://img.icons8.com/?size=100&id=g8ltXTwIfJ1n&format=png&color=000000")
msg.submit(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])
send_btn.click(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot])
if __name__ == "__main__":
demo.launch()