Spaces:
Sleeping
Sleeping
# import bs4 | |
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader | |
from langchain_community.vectorstores import FAISS | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint | |
from langchain.prompts import PromptTemplate | |
import gradio as gr | |
repo_id = "HuggingFaceH4/zephyr-7b-beta" | |
llm = HuggingFaceEndpoint( | |
repo_id=repo_id, max_length=128, temperature=0.1 | |
) | |
def web_load(path): | |
loader = WebBaseLoader( | |
web_paths=(path,), | |
# bs_kwargs=dict( | |
# parse_only=bs4.SoupStrainer( | |
# class_=("post-content", "post-title", "post-header") | |
# ) | |
# ), | |
) | |
docs = loader.load() | |
return docs | |
def pdf_load(path): | |
loader = PyPDFLoader(path) | |
pages = loader.load_and_split() | |
return pages | |
def vector_store(path): | |
if path.endswith(".pdf"): | |
docs = pdf_load(path) | |
elif path.startswith("http" or "www"): | |
docs = web_load(path) | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
splits = text_splitter.split_documents(docs) | |
vectorstore = FAISS.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5')) | |
return vectorstore, "Done setup! You may proceed to Chatbot. " | |
def invoke(user_input, retriever): | |
prompt_template = """ | |
<|system|> | |
Answer the question based on your knowledge. Use the following context to help: | |
{context} | |
</s> | |
<|user|> | |
{question} | |
</s> | |
<|assistant|> | |
""" | |
prompt = PromptTemplate( | |
input_variables=["context", "question"], | |
template=prompt_template, | |
) | |
rag_chain = ( | |
{"context": retriever, "question": RunnablePassthrough()} | |
| prompt | |
| llm | |
| StrOutputParser() | |
) | |
ans = rag_chain.invoke(user_input) | |
return ans | |
def rag_chatbot(vectorstore, user_input, chat_history): | |
retriever = vectorstore.as_retriever() | |
answer = invoke(user_input, retriever) | |
chat_history.append((user_input, answer)) | |
return "", chat_history | |
def source (radio, source1, source2): | |
if radio == "website": | |
return source1 | |
elif radio == "PDF": | |
return source2 | |
with gr.Blocks() as demo: | |
vectorstore = gr.State() | |
with gr.Tab("Setup"): | |
gr.Markdown("Input a website ULR or upload a PDF file") | |
with gr.Row(): | |
source1 = gr.Textbox(label="Input website",) | |
source2 = gr.Files(label="Upload a PDF file", file_count="single", file_types=["pdf"]) | |
radio = gr.Radio(["website", "PDF"], label="Select type of source", interactive=True) | |
path = gr.Textbox(label="Path of source", visible=True, interactive=False) | |
radio.change(fn=source, inputs=[radio,source1,source2], outputs=path) | |
source1.change(fn=source, inputs=[radio,source1,source2], outputs=path) | |
source2.change(fn=source, inputs=[radio,source1,source2], outputs=path) | |
done = gr.Textbox(label="Progress", interactive=False) | |
setup_btn = gr.Button("Initialize vectorstore") | |
setup_btn.click(fn=vector_store, inputs=[path], outputs=[vectorstore, done]) | |
with gr.Tab("Chatbot"): | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox() | |
with gr.Row(): | |
clear = gr.ClearButton([msg, chatbot], icon="https://img.icons8.com/?size=100&id=Xnx8cxDef16O&format=png&color=000000") | |
send_btn = gr.Button("Send", variant='primary', icon="https://img.icons8.com/?size=100&id=g8ltXTwIfJ1n&format=png&color=000000") | |
msg.submit(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot]) | |
send_btn.click(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot]) | |
if __name__ == "__main__": | |
demo.launch() |